xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 184914b51f506b1e5092fe675369361af1b1aa93)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpiaij.c,v 1.300 1999/09/15 02:05:29 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/aij/mpi/mpiaij.h"
6 #include "src/vec/vecimpl.h"
7 #include "src/inline/spops.h"
8 
9 extern int MatSetUpMultiply_MPIAIJ(Mat);
10 extern int DisAssemble_MPIAIJ(Mat);
11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatPrintHelp_SeqAIJ(Mat);
15 
16 /* local utility routine that creates a mapping from the global column
17 number to the local number in the off-diagonal part of the local
18 storage of the matrix.  This is done in a non scable way since the
19 length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
26   Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data;
27   int        n = B->n,i,ierr;
28 
29   PetscFunctionBegin;
30 #if defined (PETSC_USE_CTABLE)
31   ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr);
32   for ( i=0; i<n; i++ ){
33     ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
34   }
35 #else
36   aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap);
37   PLogObjectMemory(mat,aij->N*sizeof(int));
38   ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr);
39   for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1;
40 #endif
41   PetscFunctionReturn(0);
42 }
43 
44 #define CHUNKSIZE   15
45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
46 { \
47  \
48     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
49     rmax = aimax[row]; nrow = ailen[row];  \
50     col1 = col - shift; \
51      \
52     low = 0; high = nrow; \
53     while (high-low > 5) { \
54       t = (low+high)/2; \
55       if (rp[t] > col) high = t; \
56       else             low  = t; \
57     } \
58       for ( _i=0; _i<nrow; _i++ ) { \
59         if (rp[_i] > col1) break; \
60         if (rp[_i] == col1) { \
61           if (addv == ADD_VALUES) ap[_i] += value;   \
62           else                  ap[_i] = value; \
63           goto a_noinsert; \
64         } \
65       }  \
66       if (nonew == 1) goto a_noinsert; \
67       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
68       if (nrow >= rmax) { \
69         /* there is no extra room in row, therefore enlarge */ \
70         int    new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \
71         Scalar *new_a; \
72  \
73         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
74  \
75         /* malloc new storage space */ \
76         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \
77         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
78         new_j   = (int *) (new_a + new_nz); \
79         new_i   = new_j + new_nz; \
80  \
81         /* copy over old data into new slots */ \
82         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \
83         for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
84         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
85         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
86         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
87                                                            len*sizeof(int));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
89         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
90                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
91         /* free up old matrix storage */ \
92  \
93         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
94         if (!a->singlemalloc) { \
95            ierr = PetscFree(a->i);CHKERRQ(ierr); \
96            ierr = PetscFree(a->j);CHKERRQ(ierr); \
97         } \
98         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
99         a->singlemalloc = 1; \
100  \
101         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
102         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
103         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
104         a->maxnz += CHUNKSIZE; \
105         a->reallocs++; \
106       } \
107       N = nrow++ - 1; a->nz++; \
108       /* shift up all the later entries in this row */ \
109       for ( ii=N; ii>=_i; ii-- ) { \
110         rp[ii+1] = rp[ii]; \
111         ap[ii+1] = ap[ii]; \
112       } \
113       rp[_i] = col1;  \
114       ap[_i] = value;  \
115       a_noinsert: ; \
116       ailen[row] = nrow; \
117 }
118 
119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
120 { \
121  \
122     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
123     rmax = bimax[row]; nrow = bilen[row];  \
124     col1 = col - shift; \
125      \
126     low = 0; high = nrow; \
127     while (high-low > 5) { \
128       t = (low+high)/2; \
129       if (rp[t] > col) high = t; \
130       else             low  = t; \
131     } \
132        for ( _i=0; _i<nrow; _i++ ) { \
133         if (rp[_i] > col1) break; \
134         if (rp[_i] == col1) { \
135           if (addv == ADD_VALUES) ap[_i] += value;   \
136           else                  ap[_i] = value; \
137           goto b_noinsert; \
138         } \
139       }  \
140       if (nonew == 1) goto b_noinsert; \
141       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
142       if (nrow >= rmax) { \
143         /* there is no extra room in row, therefore enlarge */ \
144         int    new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \
145         Scalar *new_a; \
146  \
147         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
148  \
149         /* malloc new storage space */ \
150         len     = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \
151         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
152         new_j   = (int *) (new_a + new_nz); \
153         new_i   = new_j + new_nz; \
154  \
155         /* copy over old data into new slots */ \
156         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \
157         for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
158         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
159         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
160         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
161                                                            len*sizeof(int));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \
163         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
164                                                            len*sizeof(Scalar));CHKERRQ(ierr);  \
165         /* free up old matrix storage */ \
166  \
167         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
168         if (!b->singlemalloc) { \
169           ierr = PetscFree(b->i);CHKERRQ(ierr); \
170           ierr = PetscFree(b->j);CHKERRQ(ierr); \
171         } \
172         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
173         b->singlemalloc = 1; \
174  \
175         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
176         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
177         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \
178         b->maxnz += CHUNKSIZE; \
179         b->reallocs++; \
180       } \
181       N = nrow++ - 1; b->nz++; \
182       /* shift up all the later entries in this row */ \
183       for ( ii=N; ii>=_i; ii-- ) { \
184         rp[ii+1] = rp[ii]; \
185         ap[ii+1] = ap[ii]; \
186       } \
187       rp[_i] = col1;  \
188       ap[_i] = value;  \
189       b_noinsert: ; \
190       bilen[row] = nrow; \
191 }
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatSetValues_MPIAIJ"
195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
196 {
197   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
198   Scalar     value;
199   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
200   int        cstart = aij->cstart, cend = aij->cend,row,col;
201   int        roworiented = aij->roworiented;
202 
203   /* Some Variables required in the macro */
204   Mat        A = aij->A;
205   Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
206   int        *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j;
207   Scalar     *aa = a->a;
208 
209   Mat        B = aij->B;
210   Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data;
211   int        *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j;
212   Scalar     *ba = b->a;
213 
214   int        *rp,ii,nrow,_i,rmax, N, col1,low,high,t;
215   int        nonew = a->nonew,shift = a->indexshift;
216   Scalar     *ap;
217 
218   PetscFunctionBegin;
219   for ( i=0; i<m; i++ ) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_BOPT_g)
222     if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
223 #endif
224     if (im[i] >= rstart && im[i] < rend) {
225       row = im[i] - rstart;
226       for ( j=0; j<n; j++ ) {
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233         else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_BOPT_g)
235         else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!aij->colmap) {
240               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
244 	    col--;
245 #else
246             col = aij->colmap[in[j]] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
249               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
252               B = aij->B;
253               b = (Mat_SeqAIJ *) B->data;
254               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
255               ba = b->a;
256             }
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNC__
277 #define __FUNC__ "MatGetValues_MPIAIJ"
278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
279 {
280   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
281   int        ierr,i,j, rstart = aij->rstart, rend = aij->rend;
282   int        cstart = aij->cstart, cend = aij->cend,row,col;
283 
284   PetscFunctionBegin;
285   for ( i=0; i<m; i++ ) {
286     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
287     if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
288     if (idxm[i] >= rstart && idxm[i] < rend) {
289       row = idxm[i] - rstart;
290       for ( j=0; j<n; j++ ) {
291         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
292         if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
293         if (idxn[j] >= cstart && idxn[j] < cend){
294           col = idxn[j] - cstart;
295           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
296         } else {
297           if (!aij->colmap) {
298             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
299           }
300 #if defined (PETSC_USE_CTABLE)
301           ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
302           col --;
303 #else
304           col = aij->colmap[idxn[j]] - 1;
305 #endif
306           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
307           else {
308             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
309           }
310         }
311       }
312     } else {
313       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ"
321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
322 {
323   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
324   int         ierr,nstash,reallocs;
325   InsertMode  addv;
326 
327   PetscFunctionBegin;
328   if (aij->donotstash) {
329     PetscFunctionReturn(0);
330   }
331 
332   /* make sure all processors are either in INSERTMODE or ADDMODE */
333   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
334   if (addv == (ADD_VALUES|INSERT_VALUES)) {
335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
336   }
337   mat->insertmode = addv; /* in case this processor had no cache */
338 
339   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
340   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
341   PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
342   PetscFunctionReturn(0);
343 }
344 
345 
346 #undef __FUNC__
347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ"
348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
349 {
350   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
351   int         i,j,rstart,ncols,n,ierr,flg;
352   int         *row,*col,other_disassembled;
353   Scalar      *val;
354   InsertMode  addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for ( i=0; i<n; ) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*) aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388     }
389   }
390 
391   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
392     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
393   }
394   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
396 
397   if (aij->rowvalues) {
398     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
399     aij->rowvalues = 0;
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNC__
405 #define __FUNC__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ *) A->data;
422   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
423   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431 
432   PetscFunctionBegin;
433   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   procs  = nprocs + size;
440   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
441   for ( i=0; i<N; i++ ) {
442     idx = rows[i];
443     found = 0;
444     for ( j=0; j<size; j++ ) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
450   }
451   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
452 
453   /* inform other processors of number of messages and max length*/
454   work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
455   ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
456   nrecvs = work[rank];
457   ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
458   nmax = work[rank];
459   ierr = PetscFree(work);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
463   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
464   for ( i=0; i<nrecvs; i++ ) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
473   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
474   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
475   starts[0] = 0;
476   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
477   for ( i=0; i<N; i++ ) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ISRestoreIndices(is,&rows);
481 
482   starts[0] = 0;
483   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
484   count = 0;
485   for ( i=0; i<size; i++ ) {
486     if (procs[i]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]  = n;
504     slen += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
511   count = 0;
512   for ( i=0; i<nrecvs; i++ ) {
513     values = rvalues + i*nmax;
514     for ( j=0; j<lens[i]; j++ ) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for ( i = 0; i < slen; i++ ) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
560     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNC__
570 #define __FUNC__ "MatMult_MPIAIJ"
571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
574   int        ierr,nt;
575 
576   PetscFunctionBegin;
577   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
578   if (nt != a->n) {
579     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
580   }
581   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
582   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
583   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
584   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNC__
589 #define __FUNC__ "MatMultAdd_MPIAIJ"
590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
591 {
592   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
593   int        ierr;
594 
595   PetscFunctionBegin;
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatMultTrans_MPIAIJ"
605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   /* do nondiagonal part */
612   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
613   /* send it on its way */
614   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
615   /* do local part */
616   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
617   /* receive remote parts: note this assumes the values are not actually */
618   /* inserted in yy until the next line, which is true for my implementation*/
619   /* but is not perhaps always true. */
620   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatMultTransAdd_MPIAIJ"
626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
627 {
628   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
629   int        ierr;
630 
631   PetscFunctionBegin;
632   /* do nondiagonal part */
633   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
634   /* send it on its way */
635   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   /* do local part */
637   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
638   /* receive remote parts: note this assumes the values are not actually */
639   /* inserted in yy until the next line, which is true for my implementation*/
640   /* but is not perhaps always true. */
641   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
642   PetscFunctionReturn(0);
643 }
644 
645 /*
646   This only works correctly for square matrices where the subblock A->A is the
647    diagonal block
648 */
649 #undef __FUNC__
650 #define __FUNC__ "MatGetDiagonal_MPIAIJ"
651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
652 {
653   int        ierr;
654   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
655 
656   PetscFunctionBegin;
657   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
658   if (a->rstart != a->cstart || a->rend != a->cend) {
659     SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition");
660   }
661   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNC__
666 #define __FUNC__ "MatScale_MPIAIJ"
667 int MatScale_MPIAIJ(Scalar *aa,Mat A)
668 {
669   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
670   int        ierr;
671 
672   PetscFunctionBegin;
673   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
674   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
675   PetscFunctionReturn(0);
676 }
677 
678 #undef __FUNC__
679 #define __FUNC__ "MatDestroy_MPIAIJ"
680 int MatDestroy_MPIAIJ(Mat mat)
681 {
682   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
683   int        ierr;
684 
685   PetscFunctionBegin;
686 
687   if (mat->mapping) {
688     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
689   }
690   if (mat->bmapping) {
691     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
692   }
693   if (mat->rmap) {
694     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
695   }
696   if (mat->cmap) {
697     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_USE_LOG)
700   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N);
701 #endif
702   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
703   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
704   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
705   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
706 #if defined (PETSC_USE_CTABLE)
707   if (aij->colmap) TableDelete(aij->colmap);
708 #else
709   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
710 #endif
711   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
712   if (aij->lvec)   VecDestroy(aij->lvec);
713   if (aij->Mvctx)  VecScatterDestroy(aij->Mvctx);
714   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
715   ierr = PetscFree(aij);CHKERRQ(ierr);
716   PLogObjectDestroy(mat);
717   PetscHeaderDestroy(mat);
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNC__
722 #define __FUNC__ "MatView_MPIAIJ_Binary"
723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer)
724 {
725   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
726   int         ierr;
727 
728   PetscFunctionBegin;
729   if (aij->size == 1) {
730     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
731   }
732   else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
733   PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNC__
737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket"
738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
739 {
740   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *) mat->data;
741   Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
742   int         ierr, format,shift = C->indexshift,rank = aij->rank ;
743   int         size = aij->size;
744   FILE        *fd;
745   ViewerType  vtype;
746 
747   PetscFunctionBegin;
748   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
749   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
750     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
751     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
752       MatInfo info;
753       int     flg;
754       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
756       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
757       ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
758       PetscSequentialPhaseBegin(mat->comm,1);
759       if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
760          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
761       else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
762          rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
763       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
764       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);
765       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
766       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);
767       fflush(fd);
768       PetscSequentialPhaseEnd(mat->comm,1);
769       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
770       PetscFunctionReturn(0);
771     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
772       PetscFunctionReturn(0);
773     }
774   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
775     Draw       draw;
776     PetscTruth isnull;
777     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
778     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
779   }
780 
781   if (size == 1) {
782     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
783   } else {
784     /* assemble the entire matrix onto first processor. */
785     Mat         A;
786     Mat_SeqAIJ *Aloc;
787     int         M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct;
788     Scalar      *a;
789 
790     if (!rank) {
791       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
792     } else {
793       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
794     }
795     PLogObjectParent(mat,A);
796 
797     /* copy over the A part */
798     Aloc = (Mat_SeqAIJ*) aij->A->data;
799     m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
800     row = aij->rstart;
801     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;}
802     for ( i=0; i<m; i++ ) {
803       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
804       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
805     }
806     aj = Aloc->j;
807     for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;}
808 
809     /* copy over the B part */
810     Aloc = (Mat_SeqAIJ*) aij->B->data;
811     m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
812     row = aij->rstart;
813     ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols);
814     for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];}
815     for ( i=0; i<m; i++ ) {
816       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
817       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
818     }
819     ierr = PetscFree(ct);CHKERRQ(ierr);
820     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
821     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     /*
823        Everyone has to call to draw the matrix since the graphics waits are
824        synchronized across all processors that share the Draw object
825     */
826     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
827       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
828     }
829     ierr = MatDestroy(A);CHKERRQ(ierr);
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNC__
835 #define __FUNC__ "MatView_MPIAIJ"
836 int MatView_MPIAIJ(Mat mat,Viewer viewer)
837 {
838   int         ierr;
839   ViewerType  vtype;
840 
841   PetscFunctionBegin;
842   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
843   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
844       PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) {
845     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
846     /*
847   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
848     ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
849     */
850   } else {
851     SETERRQ(1,1,"Viewer type not supported by PETSc object");
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 /*
857     This has to provide several versions.
858 
859      2) a) use only local smoothing updating outer values only once.
860         b) local smoothing updating outer values each inner iteration
861      3) color updating out values betwen colors.
862 */
863 #undef __FUNC__
864 #define __FUNC__ "MatRelax_MPIAIJ"
865 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag,
866                            double fshift,int its,Vec xx)
867 {
868   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
869   Mat        AA = mat->A, BB = mat->B;
870   Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data;
871   Scalar     *b,*x,*xs,*ls,d,*v,sum;
872   int        ierr,*idx, *diag;
873   int        n = mat->n, m = mat->m, i,shift = A->indexshift;
874 
875   PetscFunctionBegin;
876   if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);}
877   diag = A->diag;
878   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
879     if (flag & SOR_ZERO_INITIAL_GUESS) {
880       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
881       PetscFunctionReturn(0);
882     }
883     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
884     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
885     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
886     if (xx != bb) {
887       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
888     } else {
889       b = x;
890     }
891     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
892     xs = x + shift; /* shift by one for index start of 1 */
893     ls = ls + shift;
894     while (its--) {
895       /* go down through the rows */
896       for ( i=0; i<m; i++ ) {
897         n    = A->i[i+1] - A->i[i];
898 	PLogFlops(4*n+3);
899         idx  = A->j + A->i[i] + shift;
900         v    = A->a + A->i[i] + shift;
901         sum  = b[i];
902         SPARSEDENSEMDOT(sum,xs,v,idx,n);
903         d    = fshift + A->a[diag[i]+shift];
904         n    = B->i[i+1] - B->i[i];
905         idx  = B->j + B->i[i] + shift;
906         v    = B->a + B->i[i] + shift;
907         SPARSEDENSEMDOT(sum,ls,v,idx,n);
908         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
909       }
910       /* come up through the rows */
911       for ( i=m-1; i>-1; i-- ) {
912         n    = A->i[i+1] - A->i[i];
913 	PLogFlops(4*n+3)
914         idx  = A->j + A->i[i] + shift;
915         v    = A->a + A->i[i] + shift;
916         sum  = b[i];
917         SPARSEDENSEMDOT(sum,xs,v,idx,n);
918         d    = fshift + A->a[diag[i]+shift];
919         n    = B->i[i+1] - B->i[i];
920         idx  = B->j + B->i[i] + shift;
921         v    = B->a + B->i[i] + shift;
922         SPARSEDENSEMDOT(sum,ls,v,idx,n);
923         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
924       }
925     }
926     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
927     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
928     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
929   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
930     if (flag & SOR_ZERO_INITIAL_GUESS) {
931       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
932       PetscFunctionReturn(0);
933     }
934     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
935     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
936     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
937     if (xx != bb) {
938       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
939     } else {
940       b = x;
941     }
942     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
943     xs = x + shift; /* shift by one for index start of 1 */
944     ls = ls + shift;
945     while (its--) {
946       for ( i=0; i<m; i++ ) {
947         n    = A->i[i+1] - A->i[i];
948 	PLogFlops(4*n+3);
949         idx  = A->j + A->i[i] + shift;
950         v    = A->a + A->i[i] + shift;
951         sum  = b[i];
952         SPARSEDENSEMDOT(sum,xs,v,idx,n);
953         d    = fshift + A->a[diag[i]+shift];
954         n    = B->i[i+1] - B->i[i];
955         idx  = B->j + B->i[i] + shift;
956         v    = B->a + B->i[i] + shift;
957         SPARSEDENSEMDOT(sum,ls,v,idx,n);
958         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
959       }
960     }
961     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
962     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
963     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
964   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
965     if (flag & SOR_ZERO_INITIAL_GUESS) {
966       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr);
967       PetscFunctionReturn(0);
968     }
969     ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
970     ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
971     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
972     if (xx != bb) {
973       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
974     } else {
975       b = x;
976     }
977     ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr);
978     xs = x + shift; /* shift by one for index start of 1 */
979     ls = ls + shift;
980     while (its--) {
981       for ( i=m-1; i>-1; i-- ) {
982         n    = A->i[i+1] - A->i[i];
983 	PLogFlops(4*n+3);
984         idx  = A->j + A->i[i] + shift;
985         v    = A->a + A->i[i] + shift;
986         sum  = b[i];
987         SPARSEDENSEMDOT(sum,xs,v,idx,n);
988         d    = fshift + A->a[diag[i]+shift];
989         n    = B->i[i+1] - B->i[i];
990         idx  = B->j + B->i[i] + shift;
991         v    = B->a + B->i[i] + shift;
992         SPARSEDENSEMDOT(sum,ls,v,idx,n);
993         x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d;
994       }
995     }
996     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
997     if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); }
998     ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr);
999   } else {
1000     SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported");
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNC__
1006 #define __FUNC__ "MatGetInfo_MPIAIJ"
1007 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1008 {
1009   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1010   Mat        A = mat->A, B = mat->B;
1011   int        ierr;
1012   double     isend[5], irecv[5];
1013 
1014   PetscFunctionBegin;
1015   info->block_size     = 1.0;
1016   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1017   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1018   isend[3] = info->memory;  isend[4] = info->mallocs;
1019   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1020   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1021   isend[3] += info->memory;  isend[4] += info->mallocs;
1022   if (flag == MAT_LOCAL) {
1023     info->nz_used      = isend[0];
1024     info->nz_allocated = isend[1];
1025     info->nz_unneeded  = isend[2];
1026     info->memory       = isend[3];
1027     info->mallocs      = isend[4];
1028   } else if (flag == MAT_GLOBAL_MAX) {
1029     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1030     info->nz_used      = irecv[0];
1031     info->nz_allocated = irecv[1];
1032     info->nz_unneeded  = irecv[2];
1033     info->memory       = irecv[3];
1034     info->mallocs      = irecv[4];
1035   } else if (flag == MAT_GLOBAL_SUM) {
1036     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1037     info->nz_used      = irecv[0];
1038     info->nz_allocated = irecv[1];
1039     info->nz_unneeded  = irecv[2];
1040     info->memory       = irecv[3];
1041     info->mallocs      = irecv[4];
1042   }
1043   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1044   info->fill_ratio_needed = 0;
1045   info->factor_mallocs    = 0;
1046   info->rows_global       = (double)mat->M;
1047   info->columns_global    = (double)mat->N;
1048   info->rows_local        = (double)mat->m;
1049   info->columns_local     = (double)mat->N;
1050 
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNC__
1055 #define __FUNC__ "MatSetOption_MPIAIJ"
1056 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1057 {
1058   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1059   int        ierr;
1060 
1061   PetscFunctionBegin;
1062   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1063       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1064       op == MAT_COLUMNS_UNSORTED ||
1065       op == MAT_COLUMNS_SORTED ||
1066       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1067       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1068         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1069         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1070   } else if (op == MAT_ROW_ORIENTED) {
1071     a->roworiented = 1;
1072     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1073     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1074   } else if (op == MAT_ROWS_SORTED ||
1075              op == MAT_ROWS_UNSORTED ||
1076              op == MAT_SYMMETRIC ||
1077              op == MAT_STRUCTURALLY_SYMMETRIC ||
1078              op == MAT_YES_NEW_DIAGONALS)
1079     PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1080   else if (op == MAT_COLUMN_ORIENTED) {
1081     a->roworiented = 0;
1082     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1083     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1084   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1085     a->donotstash = 1;
1086   } else if (op == MAT_NO_NEW_DIAGONALS){
1087     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1088   } else {
1089     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1090   }
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNC__
1095 #define __FUNC__ "MatGetSize_MPIAIJ"
1096 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n)
1097 {
1098   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1099 
1100   PetscFunctionBegin;
1101   if (m) *m = mat->M;
1102   if (n) *n = mat->N;
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNC__
1107 #define __FUNC__ "MatGetLocalSize_MPIAIJ"
1108 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n)
1109 {
1110   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1111 
1112   PetscFunctionBegin;
1113   if (m) *m = mat->m;
1114   if (n) *n = mat->n;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNC__
1119 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ"
1120 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n)
1121 {
1122   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1123 
1124   PetscFunctionBegin;
1125   *m = mat->rstart; *n = mat->rend;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNC__
1130 #define __FUNC__ "MatGetRow_MPIAIJ"
1131 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1132 {
1133   Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data;
1134   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1135   int        i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart;
1136   int        nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend;
1137   int        *cmap, *idx_p;
1138 
1139   PetscFunctionBegin;
1140   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1141   mat->getrowactive = PETSC_TRUE;
1142 
1143   if (!mat->rowvalues && (idx || v)) {
1144     /*
1145         allocate enough space to hold information from the longest row.
1146     */
1147     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data;
1148     int     max = 1,m = mat->m,tmp;
1149     for ( i=0; i<m; i++ ) {
1150       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1151       if (max < tmp) { max = tmp; }
1152     }
1153     mat->rowvalues  = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1154     mat->rowindices = (int *) (mat->rowvalues + max);
1155   }
1156 
1157   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows")
1158   lrow = row - rstart;
1159 
1160   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1161   if (!v)   {pvA = 0; pvB = 0;}
1162   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1163   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1164   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1165   nztot = nzA + nzB;
1166 
1167   cmap  = mat->garray;
1168   if (v  || idx) {
1169     if (nztot) {
1170       /* Sort by increasing column numbers, assuming A and B already sorted */
1171       int imark = -1;
1172       if (v) {
1173         *v = v_p = mat->rowvalues;
1174         for ( i=0; i<nzB; i++ ) {
1175           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1176           else break;
1177         }
1178         imark = i;
1179         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1180         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1181       }
1182       if (idx) {
1183         *idx = idx_p = mat->rowindices;
1184         if (imark > -1) {
1185           for ( i=0; i<imark; i++ ) {
1186             idx_p[i] = cmap[cworkB[i]];
1187           }
1188         } else {
1189           for ( i=0; i<nzB; i++ ) {
1190             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1191             else break;
1192           }
1193           imark = i;
1194         }
1195         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart + cworkA[i];
1196         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]];
1197       }
1198     } else {
1199       if (idx) *idx = 0;
1200       if (v)   *v   = 0;
1201     }
1202   }
1203   *nz = nztot;
1204   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1205   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNC__
1210 #define __FUNC__ "MatRestoreRow_MPIAIJ"
1211 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1212 {
1213   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1214 
1215   PetscFunctionBegin;
1216   if (aij->getrowactive == PETSC_FALSE) {
1217     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1218   }
1219   aij->getrowactive = PETSC_FALSE;
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 #undef __FUNC__
1224 #define __FUNC__ "MatNorm_MPIAIJ"
1225 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm)
1226 {
1227   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1228   Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data;
1229   int        ierr, i, j, cstart = aij->cstart,shift = amat->indexshift;
1230   double     sum = 0.0;
1231   Scalar     *v;
1232 
1233   PetscFunctionBegin;
1234   if (aij->size == 1) {
1235     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1236   } else {
1237     if (type == NORM_FROBENIUS) {
1238       v = amat->a;
1239       for (i=0; i<amat->nz; i++ ) {
1240 #if defined(PETSC_USE_COMPLEX)
1241         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1242 #else
1243         sum += (*v)*(*v); v++;
1244 #endif
1245       }
1246       v = bmat->a;
1247       for (i=0; i<bmat->nz; i++ ) {
1248 #if defined(PETSC_USE_COMPLEX)
1249         sum += PetscReal(PetscConj(*v)*(*v)); v++;
1250 #else
1251         sum += (*v)*(*v); v++;
1252 #endif
1253       }
1254       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1255       *norm = sqrt(*norm);
1256     } else if (type == NORM_1) { /* max column norm */
1257       double *tmp, *tmp2;
1258       int    *jj, *garray = aij->garray;
1259       tmp  = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp);
1260       tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2);
1261       ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr);
1262       *norm = 0.0;
1263       v = amat->a; jj = amat->j;
1264       for ( j=0; j<amat->nz; j++ ) {
1265         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1266       }
1267       v = bmat->a; jj = bmat->j;
1268       for ( j=0; j<bmat->nz; j++ ) {
1269         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1270       }
1271       ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
1272       for ( j=0; j<aij->N; j++ ) {
1273         if (tmp2[j] > *norm) *norm = tmp2[j];
1274       }
1275       ierr = PetscFree(tmp);CHKERRQ(ierr);
1276       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1277     } else if (type == NORM_INFINITY) { /* max row norm */
1278       double ntemp = 0.0;
1279       for ( j=0; j<amat->m; j++ ) {
1280         v = amat->a + amat->i[j] + shift;
1281         sum = 0.0;
1282         for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) {
1283           sum += PetscAbsScalar(*v); v++;
1284         }
1285         v = bmat->a + bmat->i[j] + shift;
1286         for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) {
1287           sum += PetscAbsScalar(*v); v++;
1288         }
1289         if (sum > ntemp) ntemp = sum;
1290       }
1291       ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr);
1292     } else {
1293       SETERRQ(PETSC_ERR_SUP,0,"No support for two norm");
1294     }
1295   }
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNC__
1300 #define __FUNC__ "MatTranspose_MPIAIJ"
1301 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1302 {
1303   Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data;
1304   Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data;
1305   int        ierr,shift = Aloc->indexshift;
1306   int        M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct;
1307   Mat        B;
1308   Scalar     *array;
1309 
1310   PetscFunctionBegin;
1311   if (matout == PETSC_NULL && M != N) {
1312     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1313   }
1314 
1315   ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1316 
1317   /* copy over the A part */
1318   Aloc = (Mat_SeqAIJ*) a->A->data;
1319   m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1320   row = a->rstart;
1321   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;}
1322   for ( i=0; i<m; i++ ) {
1323     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1324     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1325   }
1326   aj = Aloc->j;
1327   for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;}
1328 
1329   /* copy over the B part */
1330   Aloc = (Mat_SeqAIJ*) a->B->data;
1331   m = Aloc->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1332   row = a->rstart;
1333   ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols);
1334   for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];}
1335   for ( i=0; i<m; i++ ) {
1336     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1337     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1338   }
1339   ierr = PetscFree(ct);CHKERRQ(ierr);
1340   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1341   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1342   if (matout != PETSC_NULL) {
1343     *matout = B;
1344   } else {
1345     PetscOps       *Abops;
1346     struct _MatOps *Aops;
1347 
1348     /* This isn't really an in-place transpose .... but free data structures from a */
1349     ierr = PetscFree(a->rowners);CHKERRQ(ierr);
1350     ierr = MatDestroy(a->A);CHKERRQ(ierr);
1351     ierr = MatDestroy(a->B);CHKERRQ(ierr);
1352 #if defined (PETSC_USE_CTABLE)
1353     if (a->colmap) TableDelete(a->colmap);
1354 #else
1355     if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);}
1356 #endif
1357     if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);}
1358     if (a->lvec) VecDestroy(a->lvec);
1359     if (a->Mvctx) VecScatterDestroy(a->Mvctx);
1360     ierr = PetscFree(a);CHKERRQ(ierr);
1361 
1362     /*
1363        This is horrible, horrible code. We need to keep the
1364       A pointers for the bops and ops but copy everything
1365       else from C.
1366     */
1367     Abops   = A->bops;
1368     Aops    = A->ops;
1369     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1370     A->bops = Abops;
1371     A->ops  = Aops;
1372     PetscHeaderDestroy(B);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNC__
1378 #define __FUNC__ "MatDiagonalScale_MPIAIJ"
1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1380 {
1381   Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data;
1382   Mat        a = aij->A, b = aij->B;
1383   int        ierr,s1,s2,s3;
1384 
1385   PetscFunctionBegin;
1386   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1387   if (rr) {
1388     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1389     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1390     /* Overlap communication with computation. */
1391     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1392   }
1393   if (ll) {
1394     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1395     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1396     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1397   }
1398   /* scale  the diagonal block */
1399   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1400 
1401   if (rr) {
1402     /* Do a scatter end and then right scale the off-diagonal block */
1403     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1404     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1405   }
1406 
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 #undef __FUNC__
1412 #define __FUNC__ "MatPrintHelp_MPIAIJ"
1413 int MatPrintHelp_MPIAIJ(Mat A)
1414 {
1415   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1416   int        ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNC__
1426 #define __FUNC__ "MatGetBlockSize_MPIAIJ"
1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1428 {
1429   PetscFunctionBegin;
1430   *bs = 1;
1431   PetscFunctionReturn(0);
1432 }
1433 #undef __FUNC__
1434 #define __FUNC__ "MatSetUnfactored_MPIAIJ"
1435 int MatSetUnfactored_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*) A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNC__
1446 #define __FUNC__ "MatEqual_MPIAIJ"
1447 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag)
1448 {
1449   Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data;
1450   Mat        a, b, c, d;
1451   PetscTruth flg;
1452   int        ierr;
1453 
1454   PetscFunctionBegin;
1455   if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1456   a = matA->A; b = matA->B;
1457   c = matB->A; d = matB->B;
1458 
1459   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1460   if (flg == PETSC_TRUE) {
1461     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1462   }
1463   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNC__
1468 #define __FUNC__ "MatCopy_MPIAIJ"
1469 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1470 {
1471   int        ierr;
1472   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1473   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1474 
1475   PetscFunctionBegin;
1476   if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) {
1477     /* because of the column compression in the off-processor part of the matrix a->B,
1478        the number of columns in a->B and b->B may be different, hence we cannot call
1479        the MatCopy() directly on the two parts. If need be, we can provide a more
1480        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1481        then copying the submatrices */
1482     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1483   } else {
1484     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1485     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1486   }
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1491 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int);
1492 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1493 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **);
1494 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *);
1495 
1496 /* -------------------------------------------------------------------*/
1497 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1498        MatGetRow_MPIAIJ,
1499        MatRestoreRow_MPIAIJ,
1500        MatMult_MPIAIJ,
1501        MatMultAdd_MPIAIJ,
1502        MatMultTrans_MPIAIJ,
1503        MatMultTransAdd_MPIAIJ,
1504        0,
1505        0,
1506        0,
1507        0,
1508        0,
1509        0,
1510        MatRelax_MPIAIJ,
1511        MatTranspose_MPIAIJ,
1512        MatGetInfo_MPIAIJ,
1513        MatEqual_MPIAIJ,
1514        MatGetDiagonal_MPIAIJ,
1515        MatDiagonalScale_MPIAIJ,
1516        MatNorm_MPIAIJ,
1517        MatAssemblyBegin_MPIAIJ,
1518        MatAssemblyEnd_MPIAIJ,
1519        0,
1520        MatSetOption_MPIAIJ,
1521        MatZeroEntries_MPIAIJ,
1522        MatZeroRows_MPIAIJ,
1523        0,
1524        0,
1525        0,
1526        0,
1527        MatGetSize_MPIAIJ,
1528        MatGetLocalSize_MPIAIJ,
1529        MatGetOwnershipRange_MPIAIJ,
1530        0,
1531        0,
1532        0,
1533        0,
1534        MatDuplicate_MPIAIJ,
1535        0,
1536        0,
1537        0,
1538        0,
1539        0,
1540        MatGetSubMatrices_MPIAIJ,
1541        MatIncreaseOverlap_MPIAIJ,
1542        MatGetValues_MPIAIJ,
1543        MatCopy_MPIAIJ,
1544        MatPrintHelp_MPIAIJ,
1545        MatScale_MPIAIJ,
1546        0,
1547        0,
1548        0,
1549        MatGetBlockSize_MPIAIJ,
1550        0,
1551        0,
1552        0,
1553        0,
1554        MatFDColoringCreate_MPIAIJ,
1555        0,
1556        MatSetUnfactored_MPIAIJ,
1557        0,
1558        0,
1559        MatGetSubMatrix_MPIAIJ,
1560        0,
1561        0,
1562        MatGetMaps_Petsc};
1563 
1564 /* ----------------------------------------------------------------------------------------*/
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNC__
1568 #define __FUNC__ "MatStoreValues_MPIAIJ"
1569 int MatStoreValues_MPIAIJ(Mat mat)
1570 {
1571   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1572   int        ierr;
1573 
1574   PetscFunctionBegin;
1575   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1576   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 EXTERN_C_END
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNC__
1583 #define __FUNC__ "MatRetrieveValues_MPIAIJ"
1584 int MatRetrieveValues_MPIAIJ(Mat mat)
1585 {
1586   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1587   int        ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1591   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1592   PetscFunctionReturn(0);
1593 }
1594 EXTERN_C_END
1595 
1596 #include "pc.h"
1597 EXTERN_C_BEGIN
1598 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1599 EXTERN_C_END
1600 
1601 #undef __FUNC__
1602 #define __FUNC__ "MatCreateMPIAIJ"
1603 /*@C
1604    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
1605    (the default parallel PETSc format).  For good matrix assembly performance
1606    the user should preallocate the matrix storage by setting the parameters
1607    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1608    performance can be increased by more than a factor of 50.
1609 
1610    Collective on MPI_Comm
1611 
1612    Input Parameters:
1613 +  comm - MPI communicator
1614 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1615            This value should be the same as the local size used in creating the
1616            y vector for the matrix-vector product y = Ax.
1617 .  n - This value should be the same as the local size used in creating the
1618        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
1619        calculated if N is given) For square matrices n is almost always m.
1620 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1621 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1622 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
1623            (same value is used for all local rows)
1624 .  d_nnz - array containing the number of nonzeros in the various rows of the
1625            DIAGONAL portion of the local submatrix (possibly different for each row)
1626            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
1627            The size of this array is equal to the number of local rows, i.e 'm'.
1628            You must leave room for the diagonal entry even if it is zero.
1629 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
1630            submatrix (same value is used for all local rows).
1631 -  o_nnz - array containing the number of nonzeros in the various rows of the
1632            OFF-DIAGONAL portion of the local submatrix (possibly different for
1633            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
1634            structure. The size of this array is equal to the number
1635            of local rows, i.e 'm'.
1636 
1637    Output Parameter:
1638 .  A - the matrix
1639 
1640    Notes:
1641    m,n,M,N parameters specify the size of the matrix, and its partitioning across
1642    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
1643    storage requirements for this matrix.
1644 
1645    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
1646    processor than it must be used on all processors that share the object for
1647    that argument.
1648 
1649    The AIJ format (also called the Yale sparse matrix format or
1650    compressed row storage), is fully compatible with standard Fortran 77
1651    storage.  That is, the stored row and column indices can begin at
1652    either one (as in Fortran) or zero.  See the users manual for details.
1653 
1654    The user MUST specify either the local or global matrix dimensions
1655    (possibly both).
1656 
1657    The parallel matrix is partitioned such that the first m0 rows belong to
1658    process 0, the next m1 rows belong to process 1, the next m2 rows belong
1659    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
1660 
1661    The DIAGONAL portion of the local submatrix of a processor can be defined
1662    as the submatrix which is obtained by extraction the part corresponding
1663    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
1664    first row that belongs to the processor, and r2 is the last row belonging
1665    to the this processor. This is a square mxm matrix. The remaining portion
1666    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
1667 
1668    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
1669 
1670    By default, this format uses inodes (identical nodes) when possible.
1671    We search for consecutive rows with the same nonzero structure, thereby
1672    reusing matrix information to achieve increased efficiency.
1673 
1674    Options Database Keys:
1675 +  -mat_aij_no_inode  - Do not use inodes
1676 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
1677 -  -mat_aij_oneindex - Internally use indexing starting at 1
1678         rather than 0.  Note that when calling MatSetValues(),
1679         the user still MUST index entries starting at 0!
1680 .   -mat_mpi - use the parallel matrix data structures even on one processor
1681                (defaults to using SeqBAIJ format on one processor)
1682 .   -mat_mpi - use the parallel matrix data structures even on one processor
1683                (defaults to using SeqAIJ format on one processor)
1684 
1685 
1686    Example usage:
1687 
1688    Consider the following 8x8 matrix with 34 non-zero values, that is
1689    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
1690    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
1691    as follows:
1692 
1693 .vb
1694             1  2  0  |  0  3  0  |  0  4
1695     Proc0   0  5  6  |  7  0  0  |  8  0
1696             9  0 10  | 11  0  0  | 12  0
1697     -------------------------------------
1698            13  0 14  | 15 16 17  |  0  0
1699     Proc1   0 18  0  | 19 20 21  |  0  0
1700             0  0  0  | 22 23  0  | 24  0
1701     -------------------------------------
1702     Proc2  25 26 27  |  0  0 28  | 29  0
1703            30  0  0  | 31 32 33  |  0 34
1704 .ve
1705 
1706    This can be represented as a collection of submatrices as:
1707 
1708 .vb
1709       A B C
1710       D E F
1711       G H I
1712 .ve
1713 
1714    Where the submatrices A,B,C are owned by proc0, D,E,F are
1715    owned by proc1, G,H,I are owned by proc2.
1716 
1717    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1718    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
1719    The 'M','N' parameters are 8,8, and have the same values on all procs.
1720 
1721    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
1722    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
1723    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
1724    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
1725    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
1726    matrix, ans [DF] as another SeqAIJ matrix.
1727 
1728    When d_nz, o_nz parameters are specified, d_nz storage elements are
1729    allocated for every row of the local diagonal submatrix, and o_nz
1730    storage locations are allocated for every row of the OFF-DIAGONAL submat.
1731    One way to choose d_nz and o_nz is to use the max nonzerors per local
1732    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
1733    In this case, the values of d_nz,o_nz are:
1734 .vb
1735      proc0 : dnz = 2, o_nz = 2
1736      proc1 : dnz = 3, o_nz = 2
1737      proc2 : dnz = 1, o_nz = 4
1738 .ve
1739    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
1740    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
1741    for proc3. i.e we are using 12+15+10=37 storage locations to store
1742    34 values.
1743 
1744    When d_nnz, o_nnz parameters are specified, the storage is specified
1745    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
1746    In the above case the values for d_nnz,o_nnz are:
1747 .vb
1748      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
1749      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
1750      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
1751 .ve
1752    Here the space allocated is sum of all the above values i.e 34, and
1753    hence pre-allocation is perfect.
1754 
1755    Level: intermediate
1756 
1757 .keywords: matrix, aij, compressed row, sparse, parallel
1758 
1759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1760 @*/
1761 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1762 {
1763   Mat          B;
1764   Mat_MPIAIJ   *b;
1765   int          ierr, i,size,flag1 = 0, flag2 = 0;
1766 
1767   PetscFunctionBegin;
1768 
1769   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
1770   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
1771   if (d_nnz) {
1772     for (i=0; i<m; i++) {
1773       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1774     }
1775   }
1776   if (o_nnz) {
1777     for (i=0; i<m; i++) {
1778       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1779     }
1780   }
1781 
1782   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1783   ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr);
1784   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1785   if (!flag1 && !flag2 && size == 1) {
1786     if (M == PETSC_DECIDE) M = m;
1787     if (N == PETSC_DECIDE) N = n;
1788     ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1789     PetscFunctionReturn(0);
1790   }
1791 
1792   *A = 0;
1793   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView);
1794   PLogObjectCreate(B);
1795   B->data         = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b);
1796   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1797   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1798   B->ops->destroy = MatDestroy_MPIAIJ;
1799   B->ops->view    = MatView_MPIAIJ;
1800   B->factor       = 0;
1801   B->assembled    = PETSC_FALSE;
1802   B->mapping      = 0;
1803 
1804   B->insertmode      = NOT_SET_VALUES;
1805   b->size            = size;
1806   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1807 
1808   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1809     SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1810   }
1811 
1812   ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr);
1813   ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1814   b->m = m; B->m = m;
1815   b->n = n; B->n = n;
1816   b->N = N; B->N = N;
1817   b->M = M; B->M = M;
1818 
1819   /* the information in the maps duplicates the information computed below, eventually
1820      we should remove the duplicate information that is not contained in the maps */
1821   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1822   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1823 
1824   /* build local table of row and column ownerships */
1825   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1826   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1827   b->cowners = b->rowners + b->size + 2;
1828   ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1829   b->rowners[0] = 0;
1830   for ( i=2; i<=b->size; i++ ) {
1831     b->rowners[i] += b->rowners[i-1];
1832   }
1833   b->rstart = b->rowners[b->rank];
1834   b->rend   = b->rowners[b->rank+1];
1835   ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1836   b->cowners[0] = 0;
1837   for ( i=2; i<=b->size; i++ ) {
1838     b->cowners[i] += b->cowners[i-1];
1839   }
1840   b->cstart = b->cowners[b->rank];
1841   b->cend   = b->cowners[b->rank+1];
1842 
1843   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1844   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1845   PLogObjectParent(B,b->A);
1846   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1847   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1848   PLogObjectParent(B,b->B);
1849 
1850   /* build cache for off array entries formed */
1851   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
1852   b->donotstash  = 0;
1853   b->colmap      = 0;
1854   b->garray      = 0;
1855   b->roworiented = 1;
1856 
1857   /* stuff used for matrix vector multiply */
1858   b->lvec      = 0;
1859   b->Mvctx     = 0;
1860 
1861   /* stuff for MatGetRow() */
1862   b->rowindices   = 0;
1863   b->rowvalues    = 0;
1864   b->getrowactive = PETSC_FALSE;
1865 
1866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1867                                      "MatStoreValues_MPIAIJ",
1868                                      (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1869   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1870                                      "MatRetrieveValues_MPIAIJ",
1871                                      (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
1873                                      "MatGetDiagonalBlock_MPIAIJ",
1874                                      (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1875   *A = B;
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNC__
1880 #define __FUNC__ "MatDuplicate_MPIAIJ"
1881 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1882 {
1883   Mat        mat;
1884   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data;
1885   int        ierr, len=0, flg;
1886 
1887   PetscFunctionBegin;
1888   *newmat       = 0;
1889   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView);
1890   PLogObjectCreate(mat);
1891   mat->data         = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a);
1892   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1893   mat->ops->destroy = MatDestroy_MPIAIJ;
1894   mat->ops->view    = MatView_MPIAIJ;
1895   mat->factor       = matin->factor;
1896   mat->assembled    = PETSC_TRUE;
1897   mat->insertmode   = NOT_SET_VALUES;
1898 
1899   a->m = mat->m   = oldmat->m;
1900   a->n = mat->n   = oldmat->n;
1901   a->M = mat->M   = oldmat->M;
1902   a->N = mat->N   = oldmat->N;
1903 
1904   a->rstart       = oldmat->rstart;
1905   a->rend         = oldmat->rend;
1906   a->cstart       = oldmat->cstart;
1907   a->cend         = oldmat->cend;
1908   a->size         = oldmat->size;
1909   a->rank         = oldmat->rank;
1910   a->donotstash   = oldmat->donotstash;
1911   a->roworiented  = oldmat->roworiented;
1912   a->rowindices   = 0;
1913   a->rowvalues    = 0;
1914   a->getrowactive = PETSC_FALSE;
1915 
1916   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
1917   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1918   a->cowners = a->rowners + a->size + 2;
1919   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1920   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1921   if (oldmat->colmap) {
1922 #if defined (PETSC_USE_CTABLE)
1923     ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1924 #else
1925     a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap);
1926     PLogObjectMemory(mat,(a->N)*sizeof(int));
1927     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr);
1928 #endif
1929   } else a->colmap = 0;
1930   if (oldmat->garray) {
1931     len = ((Mat_SeqAIJ *) (oldmat->B->data))->n;
1932     a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray);
1933     PLogObjectMemory(mat,len*sizeof(int));
1934     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1935   } else a->garray = 0;
1936 
1937   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1938   PLogObjectParent(mat,a->lvec);
1939   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1940   PLogObjectParent(mat,a->Mvctx);
1941   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1942   PLogObjectParent(mat,a->A);
1943   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1944   PLogObjectParent(mat,a->B);
1945   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
1946   if (flg) {
1947     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
1948   }
1949   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1950   *newmat = mat;
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 #include "sys.h"
1955 
1956 #undef __FUNC__
1957 #define __FUNC__ "MatLoad_MPIAIJ"
1958 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat)
1959 {
1960   Mat          A;
1961   Scalar       *vals,*svals;
1962   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1963   MPI_Status   status;
1964   int          i, nz, ierr, j,rstart, rend, fd;
1965   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1966   int          *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols;
1967   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1968 
1969   PetscFunctionBegin;
1970   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1971   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1972   if (!rank) {
1973     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1974     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1975     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
1976     if (header[3] < 0) {
1977       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ");
1978     }
1979   }
1980 
1981   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1982   M = header[1]; N = header[2];
1983   /* determine ownership of all rows */
1984   m = M/size + ((M % size) > rank);
1985   rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners);
1986   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1987   rowners[0] = 0;
1988   for ( i=2; i<=size; i++ ) {
1989     rowners[i] += rowners[i-1];
1990   }
1991   rstart = rowners[rank];
1992   rend   = rowners[rank+1];
1993 
1994   /* distribute row lengths to all processors */
1995   ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens);
1996   offlens = ourlens + (rend-rstart);
1997   if (!rank) {
1998     rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths);
1999     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2000     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2001     for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i];
2002     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2003     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2004   } else {
2005     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr);
2006   }
2007 
2008   if (!rank) {
2009     /* calculate the number of nonzeros on each processor */
2010     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2011     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2012     for ( i=0; i<size; i++ ) {
2013       for ( j=rowners[i]; j< rowners[i+1]; j++ ) {
2014         procsnz[i] += rowlengths[j];
2015       }
2016     }
2017     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2018 
2019     /* determine max buffer needed and allocate it */
2020     maxnz = 0;
2021     for ( i=0; i<size; i++ ) {
2022       maxnz = PetscMax(maxnz,procsnz[i]);
2023     }
2024     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2025 
2026     /* read in my part of the matrix column indices  */
2027     nz     = procsnz[0];
2028     mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2029     ierr   = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2030 
2031     /* read in every one elses and ship off */
2032     for ( i=1; i<size; i++ ) {
2033       nz   = procsnz[i];
2034       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2035       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2036     }
2037     ierr = PetscFree(cols);CHKERRQ(ierr);
2038   } else {
2039     /* determine buffer space needed for message */
2040     nz = 0;
2041     for ( i=0; i<m; i++ ) {
2042       nz += ourlens[i];
2043     }
2044     mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols);
2045 
2046     /* receive message of column indices*/
2047     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2048     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2049     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2050   }
2051 
2052   /* determine column ownership if matrix is not square */
2053   if (N != M) {
2054     n      = N/size + ((N % size) > rank);
2055     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2056     cstart = cend - n;
2057   } else {
2058     cstart = rstart;
2059     cend   = rend;
2060     n      = cend - cstart;
2061   }
2062 
2063   /* loop over local rows, determining number of off diagonal entries */
2064   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2065   jj = 0;
2066   for ( i=0; i<m; i++ ) {
2067     for ( j=0; j<ourlens[i]; j++ ) {
2068       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2069       jj++;
2070     }
2071   }
2072 
2073   /* create our matrix */
2074   for ( i=0; i<m; i++ ) {
2075     ourlens[i] -= offlens[i];
2076   }
2077   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2078   A = *newmat;
2079   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2080   for ( i=0; i<m; i++ ) {
2081     ourlens[i] += offlens[i];
2082   }
2083 
2084   if (!rank) {
2085     vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals);
2086 
2087     /* read in my part of the matrix numerical values  */
2088     nz   = procsnz[0];
2089     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2090 
2091     /* insert into matrix */
2092     jj      = rstart;
2093     smycols = mycols;
2094     svals   = vals;
2095     for ( i=0; i<m; i++ ) {
2096       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2097       smycols += ourlens[i];
2098       svals   += ourlens[i];
2099       jj++;
2100     }
2101 
2102     /* read in other processors and ship out */
2103     for ( i=1; i<size; i++ ) {
2104       nz   = procsnz[i];
2105       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2106       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2107     }
2108     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2109   } else {
2110     /* receive numeric values */
2111     vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals);
2112 
2113     /* receive message of values*/
2114     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2115     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2116     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2117 
2118     /* insert into matrix */
2119     jj      = rstart;
2120     smycols = mycols;
2121     svals   = vals;
2122     for ( i=0; i<m; i++ ) {
2123       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2124       smycols += ourlens[i];
2125       svals   += ourlens[i];
2126       jj++;
2127     }
2128   }
2129   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2130   ierr = PetscFree(vals);CHKERRQ(ierr);
2131   ierr = PetscFree(mycols);CHKERRQ(ierr);
2132   ierr = PetscFree(rowners);CHKERRQ(ierr);
2133 
2134   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #undef __FUNC__
2140 #define __FUNC__ "MatGetSubMatrix_MPIAIJ"
2141 /*
2142     Not great since it makes two copies of the submatrix, first an SeqAIJ
2143   in local and then by concatenating the local matrices the end result.
2144   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2145 */
2146 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2147 {
2148   int        ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2149   Mat        *local,M, Mreuse;
2150   Scalar     *vwork,*aa;
2151   MPI_Comm   comm = mat->comm;
2152   Mat_SeqAIJ *aij;
2153   int        *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend;
2154 
2155   PetscFunctionBegin;
2156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2157   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2158 
2159   if (call ==  MAT_REUSE_MATRIX) {
2160     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2161     if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse");
2162     local = &Mreuse;
2163     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2164   } else {
2165     ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2166     Mreuse = *local;
2167     ierr = PetscFree(local);CHKERRQ(ierr);
2168   }
2169 
2170   /*
2171       m - number of local rows
2172       n - number of columns (same on all processors)
2173       rstart - first row in new global matrix generated
2174   */
2175   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2176   if (call == MAT_INITIAL_MATRIX) {
2177     aij = (Mat_SeqAIJ *) (Mreuse)->data;
2178     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2179     ii  = aij->i;
2180     jj  = aij->j;
2181 
2182     /*
2183         Determine the number of non-zeros in the diagonal and off-diagonal
2184         portions of the matrix in order to do correct preallocation
2185     */
2186 
2187     /* first get start and end of "diagonal" columns */
2188     if (csize == PETSC_DECIDE) {
2189       nlocal = n/size + ((n % size) > rank);
2190     } else {
2191       nlocal = csize;
2192     }
2193     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2194     rstart = rend - nlocal;
2195     if (rank == size - 1 && rend != n) {
2196       SETERRQ(1,1,"Local column sizes do not add up to total number of columns");
2197     }
2198 
2199     /* next, compute all the lengths */
2200     dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens);
2201     olens = dlens + m;
2202     for ( i=0; i<m; i++ ) {
2203       jend = ii[i+1] - ii[i];
2204       olen = 0;
2205       dlen = 0;
2206       for ( j=0; j<jend; j++ ) {
2207         if ( *jj < rstart || *jj >= rend) olen++;
2208         else dlen++;
2209         jj++;
2210       }
2211       olens[i] = olen;
2212       dlens[i] = dlen;
2213     }
2214     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2215     ierr = PetscFree(dlens);CHKERRQ(ierr);
2216   } else {
2217     int ml,nl;
2218 
2219     M = *newmat;
2220     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2221     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request");
2222     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2223     /*
2224          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2225        rather than the slower MatSetValues().
2226     */
2227     M->was_assembled = PETSC_TRUE;
2228     M->assembled     = PETSC_FALSE;
2229   }
2230   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2231   aij = (Mat_SeqAIJ *) (Mreuse)->data;
2232   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix");
2233   ii  = aij->i;
2234   jj  = aij->j;
2235   aa  = aij->a;
2236   for (i=0; i<m; i++) {
2237     row   = rstart + i;
2238     nz    = ii[i+1] - ii[i];
2239     cwork = jj;     jj += nz;
2240     vwork = aa;     aa += nz;
2241     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2242   }
2243 
2244   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2245   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2246   *newmat = M;
2247 
2248   /* save submatrix used in processor for next request */
2249   if (call ==  MAT_INITIAL_MATRIX) {
2250     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2251     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2252   }
2253 
2254   PetscFunctionReturn(0);
2255 }
2256