xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision be9f9ed3dfd084f007435b442bc709b44f0f745d)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 /*
8   Local utility routine that creates a mapping from the global column
9 number to the local number in the off-diagonal part of the local
10 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
11 a slightly higher hash table cost; without it it is not scalable (each processor
12 has an order N integer array but is fast to acess.
13 */
14 #undef __FUNCT__
15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19   int        n = aij->B->n,i,ierr;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
29   PetscLogObjectMemory(mat,mat->N*sizeof(int));
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39  \
40     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
41     rmax = aimax[row]; nrow = ailen[row];  \
42     col1 = col - shift; \
43      \
44     low = 0; high = nrow; \
45     while (high-low > 5) { \
46       t = (low+high)/2; \
47       if (rp[t] > col) high = t; \
48       else             low  = t; \
49     } \
50       for (_i=low; _i<high; _i++) { \
51         if (rp[_i] > col1) break; \
52         if (rp[_i] == col1) { \
53           if (addv == ADD_VALUES) ap[_i] += value;   \
54           else                  ap[_i] = value; \
55           goto a_noinsert; \
56         } \
57       }  \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (int*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(int));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                  ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (nonew == 1) goto b_noinsert; \
133       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
134       if (nrow >= rmax) { \
135         /* there is no extra room in row, therefore enlarge */ \
136         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
137         PetscScalar *new_a; \
138  \
139         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
140  \
141         /* malloc new storage space */ \
142         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
143         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
144         new_j   = (int*)(new_a + new_nz); \
145         new_i   = new_j + new_nz; \
146  \
147         /* copy over old data into new slots */ \
148         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
149         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
150         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
151         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
152         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
153                                                            len*sizeof(int));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
156                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
157         /* free up old matrix storage */ \
158  \
159         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
160         if (!b->singlemalloc) { \
161           ierr = PetscFree(b->i);CHKERRQ(ierr); \
162           ierr = PetscFree(b->j);CHKERRQ(ierr); \
163         } \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = PETSC_TRUE; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for (ii=N; ii>=_i; ii--) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIAIJ"
187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
190   PetscScalar  value;
191   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
277   int        cstart = aij->cstart,cend = aij->cend,row,col;
278 
279   PetscFunctionBegin;
280   for (i=0; i<m; i++) {
281     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
282     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
283     if (idxm[i] >= rstart && idxm[i] < rend) {
284       row = idxm[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
287         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
288         if (idxn[j] >= cstart && idxn[j] < cend){
289           col = idxn[j] - cstart;
290           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
291         } else {
292           if (!aij->colmap) {
293             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
294           }
295 #if defined (PETSC_USE_CTABLE)
296           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
297           col --;
298 #else
299           col = aij->colmap[idxn[j]] - 1;
300 #endif
301           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
302           else {
303             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
304           }
305         }
306       }
307     } else {
308       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
309     }
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317 {
318   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
319   int         ierr,nstash,reallocs;
320   InsertMode  addv;
321 
322   PetscFunctionBegin;
323   if (aij->donotstash) {
324     PetscFunctionReturn(0);
325   }
326 
327   /* make sure all processors are either in INSERTMODE or ADDMODE */
328   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
329   if (addv == (ADD_VALUES|INSERT_VALUES)) {
330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331   }
332   mat->insertmode = addv; /* in case this processor had no cache */
333 
334   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
335   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
336   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
337   PetscFunctionReturn(0);
338 }
339 
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
344 {
345   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
346   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
347   int         i,j,rstart,ncols,n,ierr,flg;
348   int         *row,*col,other_disassembled;
349   PetscScalar *val;
350   InsertMode  addv = mat->insertmode;
351 
352   PetscFunctionBegin;
353   if (!aij->donotstash) {
354     while (1) {
355       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
356       if (!flg) break;
357 
358       for (i=0; i<n;) {
359         /* Now identify the consecutive vals belonging to the same row */
360         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361         if (j < n) ncols = j-i;
362         else       ncols = n-i;
363         /* Now assemble all these values with a single function call */
364         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
365         i = j;
366       }
367     }
368     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
369   }
370 
371   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
372   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
373 
374   /* determine if any processor has disassembled, if so we must
375      also disassemble ourselfs, in order that we may reassemble. */
376   /*
377      if nonzero structure of submatrix B cannot change then we know that
378      no processor disassembled thus we can skip this stuff
379   */
380   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
381     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
382     if (mat->was_assembled && !other_disassembled) {
383       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
384     }
385   }
386 
387   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
389   }
390   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
391   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
392 
393   if (aij->rowvalues) {
394     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
395     aij->rowvalues = 0;
396   }
397 
398   /* used by MatAXPY() */
399   a->xtoy = 0; b->xtoy = 0;
400   a->XtoY = 0; b->XtoY = 0;
401 
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
407 int MatZeroEntries_MPIAIJ(Mat A)
408 {
409   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410   int        ierr;
411 
412   PetscFunctionBegin;
413   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
414   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroRows_MPIAIJ"
420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
421 {
422   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
423   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
424   int            *nprocs,j,idx,nsends,row;
425   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
426   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
427   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
428   MPI_Comm       comm = A->comm;
429   MPI_Request    *send_waits,*recv_waits;
430   MPI_Status     recv_status,*send_status;
431   IS             istmp;
432   PetscTruth     found;
433 
434   PetscFunctionBegin;
435   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
442   for (i=0; i<N; i++) {
443     idx = rows[i];
444     found = PETSC_FALSE;
445     for (j=0; j<size; j++) {
446       if (idx >= owners[j] && idx < owners[j+1]) {
447         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
448       }
449     }
450     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
451   }
452   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
453 
454   /* inform other processors of number of messages and max length*/
455   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
456 
457   /* post receives:   */
458   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
459   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
460   for (i=0; i<nrecvs; i++) {
461     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
462   }
463 
464   /* do sends:
465       1) starts[i] gives the starting index in svalues for stuff going to
466          the ith processor
467   */
468   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
469   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
470   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
471   starts[0] = 0;
472   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
473   for (i=0; i<N; i++) {
474     svalues[starts[owner[i]]++] = rows[i];
475   }
476   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
477 
478   starts[0] = 0;
479   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   count = 0;
481   for (i=0; i<size; i++) {
482     if (nprocs[2*i+1]) {
483       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
484     }
485   }
486   ierr = PetscFree(starts);CHKERRQ(ierr);
487 
488   base = owners[rank];
489 
490   /*  wait on receives */
491   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
492   source = lens + nrecvs;
493   count  = nrecvs; slen = 0;
494   while (count) {
495     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
496     /* unpack receives into our local space */
497     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
498     source[imdex]  = recv_status.MPI_SOURCE;
499     lens[imdex]    = n;
500     slen          += n;
501     count--;
502   }
503   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
504 
505   /* move the data into the send scatter */
506   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<nrecvs; i++) {
509     values = rvalues + i*nmax;
510     for (j=0; j<lens[i]; j++) {
511       lrows[count++] = values[j] - base;
512     }
513   }
514   ierr = PetscFree(rvalues);CHKERRQ(ierr);
515   ierr = PetscFree(lens);CHKERRQ(ierr);
516   ierr = PetscFree(owner);CHKERRQ(ierr);
517   ierr = PetscFree(nprocs);CHKERRQ(ierr);
518 
519   /* actually zap the local rows */
520   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
521   PetscLogObjectParent(A,istmp);
522 
523   /*
524         Zero the required rows. If the "diagonal block" of the matrix
525      is square and the user wishes to set the diagonal we use seperate
526      code so that MatSetValues() is not called for each diagonal allocating
527      new memory, thus calling lots of mallocs and slowing things down.
528 
529        Contributed by: Mathew Knepley
530   */
531   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
533   if (diag && (l->A->M == l->A->N)) {
534     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
535   } else if (diag) {
536     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
537     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540     }
541     for (i = 0; i < slen; i++) {
542       row  = lrows[i] + rstart;
543       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   } else {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549   }
550   ierr = ISDestroy(istmp);CHKERRQ(ierr);
551   ierr = PetscFree(lrows);CHKERRQ(ierr);
552 
553   /* wait on sends */
554   if (nsends) {
555     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
556     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
557     ierr = PetscFree(send_status);CHKERRQ(ierr);
558   }
559   ierr = PetscFree(send_waits);CHKERRQ(ierr);
560   ierr = PetscFree(svalues);CHKERRQ(ierr);
561 
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMult_MPIAIJ"
567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
568 {
569   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
570   int        ierr,nt;
571 
572   PetscFunctionBegin;
573   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
574   if (nt != A->n) {
575     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
576   }
577   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
578   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
579   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
580   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMultAdd_MPIAIJ"
586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr;
590 
591   PetscFunctionBegin;
592   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
594   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
595   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602 {
603   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604   int        ierr;
605 
606   PetscFunctionBegin;
607   /* do nondiagonal part */
608   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
609   /* send it on its way */
610   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
611   /* do local part */
612   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
613   /* receive remote parts: note this assumes the values are not actually */
614   /* inserted in yy until the next line, which is true for my implementation*/
615   /* but is not perhaps always true. */
616   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 EXTERN_C_BEGIN
621 #undef __FUNCT__
622 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
623 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
624 {
625   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
626   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
627   IS         Me,Notme;
628   int        M,N,first,last,*notme,i, ierr;
629 
630   PetscFunctionBegin;
631 
632   /* Easy test: symmetric diagonal block */
633   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
634   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
635   if (!*f) PetscFunctionReturn(0);
636 
637   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
638   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
639   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
640   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
641   for (i=0; i<first; i++) notme[i] = i;
642   for (i=last; i<M; i++) notme[i-last+first] = i;
643   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
644   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
645   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
646   Aoff = Aoffs[0];
647   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
648   Boff = Boffs[0];
649   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
650   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
651   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
652   ierr = ISDestroy(Me); CHKERRQ(ierr);
653   ierr = ISDestroy(Notme); CHKERRQ(ierr);
654 
655   PetscFunctionReturn(0);
656 }
657 EXTERN_C_END
658 
659 #undef __FUNCT__
660 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
661 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
662 {
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664   int        ierr;
665 
666   PetscFunctionBegin;
667   /* do nondiagonal part */
668   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
669   /* send it on its way */
670   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
671   /* do local part */
672   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
673   /* receive remote parts: note this assumes the values are not actually */
674   /* inserted in yy until the next line, which is true for my implementation*/
675   /* but is not perhaps always true. */
676   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 /*
681   This only works correctly for square matrices where the subblock A->A is the
682    diagonal block
683 */
684 #undef __FUNCT__
685 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
686 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
687 {
688   int        ierr;
689   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
690 
691   PetscFunctionBegin;
692   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
693   if (a->rstart != a->cstart || a->rend != a->cend) {
694     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
695   }
696   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatScale_MPIAIJ"
702 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
703 {
704   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
705   int        ierr;
706 
707   PetscFunctionBegin;
708   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
709   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatDestroy_MPIAIJ"
715 int MatDestroy_MPIAIJ(Mat mat)
716 {
717   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
718   int        ierr;
719 
720   PetscFunctionBegin;
721 #if defined(PETSC_USE_LOG)
722   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
723 #endif
724   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
725   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
726   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
727   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
728 #if defined (PETSC_USE_CTABLE)
729   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
730 #else
731   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
732 #endif
733   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
734   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
735   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
736   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
737   ierr = PetscFree(aij);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIAIJ_Binary"
743 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
746   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
747   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
748   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
749   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
750   PetscScalar       *column_values;
751 
752   PetscFunctionBegin;
753   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
754   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
755   nz   = A->nz + B->nz;
756   if (rank == 0) {
757     header[0] = MAT_FILE_COOKIE;
758     header[1] = mat->M;
759     header[2] = mat->N;
760     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
761     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
762     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
763     /* get largest number of rows any processor has */
764     rlen = mat->m;
765     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
766     for (i=1; i<size; i++) {
767       rlen = PetscMax(rlen,range[i+1] - range[i]);
768     }
769   } else {
770     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
771     rlen = mat->m;
772   }
773 
774   /* load up the local row counts */
775   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
776   for (i=0; i<mat->m; i++) {
777     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
778   }
779 
780   /* store the row lengths to the file */
781   if (rank == 0) {
782     MPI_Status status;
783     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
784     for (i=1; i<size; i++) {
785       rlen = range[i+1] - range[i];
786       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
787       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
788     }
789   } else {
790     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
791   }
792   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
793 
794   /* load up the local column indices */
795   nzmax = nz; /* )th processor needs space a largest processor needs */
796   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
797   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
798   cnt  = 0;
799   for (i=0; i<mat->m; i++) {
800     for (j=B->i[i]; j<B->i[i+1]; j++) {
801       if ( (col = garray[B->j[j]]) > cstart) break;
802       column_indices[cnt++] = col;
803     }
804     for (k=A->i[i]; k<A->i[i+1]; k++) {
805       column_indices[cnt++] = A->j[k] + cstart;
806     }
807     for (; j<B->i[i+1]; j++) {
808       column_indices[cnt++] = garray[B->j[j]];
809     }
810   }
811   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
812 
813   /* store the column indices to the file */
814   if (rank == 0) {
815     MPI_Status status;
816     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
817     for (i=1; i<size; i++) {
818       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
819       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
820       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
822     }
823   } else {
824     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
825     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(column_indices);CHKERRQ(ierr);
828 
829   /* load up the local column values */
830   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
831   cnt  = 0;
832   for (i=0; i<mat->m; i++) {
833     for (j=B->i[i]; j<B->i[i+1]; j++) {
834       if ( garray[B->j[j]] > cstart) break;
835       column_values[cnt++] = B->a[j];
836     }
837     for (k=A->i[i]; k<A->i[i+1]; k++) {
838       column_values[cnt++] = A->a[k];
839     }
840     for (; j<B->i[i+1]; j++) {
841       column_values[cnt++] = B->a[j];
842     }
843   }
844   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
845 
846   /* store the column values to the file */
847   if (rank == 0) {
848     MPI_Status status;
849     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
850     for (i=1; i<size; i++) {
851       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
852       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
853       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
855     }
856   } else {
857     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
858     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(column_values);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
866 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
867 {
868   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
869   int               ierr,rank = aij->rank,size = aij->size;
870   PetscTruth        isdraw,isascii,flg,isbinary;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
876   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
878   if (isascii) {
879     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
880     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
881       MatInfo info;
882       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
883       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
884       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
885       if (flg) {
886         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
887 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
888       } else {
889         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
890 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
891       }
892       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
893       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
894       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
895       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
896       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
897       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
898       PetscFunctionReturn(0);
899     } else if (format == PETSC_VIEWER_ASCII_INFO) {
900       PetscFunctionReturn(0);
901     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
902       PetscFunctionReturn(0);
903     }
904   } else if (isbinary) {
905     if (size == 1) {
906       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
907       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
908     } else {
909       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
910     }
911     PetscFunctionReturn(0);
912   } else if (isdraw) {
913     PetscDraw  draw;
914     PetscTruth isnull;
915     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
916     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
917   }
918 
919   if (size == 1) {
920     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
921     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
922   } else {
923     /* assemble the entire matrix onto first processor. */
924     Mat         A;
925     Mat_SeqAIJ *Aloc;
926     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
927     PetscScalar *a;
928 
929     if (!rank) {
930       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
931     } else {
932       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
933     }
934     PetscLogObjectParent(mat,A);
935 
936     /* copy over the A part */
937     Aloc = (Mat_SeqAIJ*)aij->A->data;
938     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
939     row = aij->rstart;
940     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
941     for (i=0; i<m; i++) {
942       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
943       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
944     }
945     aj = Aloc->j;
946     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
947 
948     /* copy over the B part */
949     Aloc = (Mat_SeqAIJ*)aij->B->data;
950     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
951     row  = aij->rstart;
952     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
953     ct   = cols;
954     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
955     for (i=0; i<m; i++) {
956       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
957       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
958     }
959     ierr = PetscFree(ct);CHKERRQ(ierr);
960     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962     /*
963        Everyone has to call to draw the matrix since the graphics waits are
964        synchronized across all processors that share the PetscDraw object
965     */
966     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
967     if (!rank) {
968       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
969       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
970     }
971     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
972     ierr = MatDestroy(A);CHKERRQ(ierr);
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatView_MPIAIJ"
979 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
980 {
981   int        ierr;
982   PetscTruth isascii,isdraw,issocket,isbinary;
983 
984   PetscFunctionBegin;
985   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
986   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
987   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
988   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
989   if (isascii || isdraw || isbinary || issocket) {
990     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
991   } else {
992     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatRelax_MPIAIJ"
1001 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1002 {
1003   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1004   int          ierr;
1005   Vec          bb1;
1006   PetscScalar  mone=-1.0;
1007 
1008   PetscFunctionBegin;
1009   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1010 
1011   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1012 
1013   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1014     if (flag & SOR_ZERO_INITIAL_GUESS) {
1015       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1016       its--;
1017     }
1018 
1019     while (its--) {
1020       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1021       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1022 
1023       /* update rhs: bb1 = bb - B*x */
1024       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1025       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1026 
1027       /* local sweep */
1028       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1029       CHKERRQ(ierr);
1030     }
1031   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1032     if (flag & SOR_ZERO_INITIAL_GUESS) {
1033       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1034       its--;
1035     }
1036     while (its--) {
1037       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039 
1040       /* update rhs: bb1 = bb - B*x */
1041       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1042       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1043 
1044       /* local sweep */
1045       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1046       CHKERRQ(ierr);
1047     }
1048   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1049     if (flag & SOR_ZERO_INITIAL_GUESS) {
1050       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1051       its--;
1052     }
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else {
1066     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1067   }
1068 
1069   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1075 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1076 {
1077   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1078   Mat        A = mat->A,B = mat->B;
1079   int        ierr;
1080   PetscReal  isend[5],irecv[5];
1081 
1082   PetscFunctionBegin;
1083   info->block_size     = 1.0;
1084   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1085   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1086   isend[3] = info->memory;  isend[4] = info->mallocs;
1087   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1088   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1089   isend[3] += info->memory;  isend[4] += info->mallocs;
1090   if (flag == MAT_LOCAL) {
1091     info->nz_used      = isend[0];
1092     info->nz_allocated = isend[1];
1093     info->nz_unneeded  = isend[2];
1094     info->memory       = isend[3];
1095     info->mallocs      = isend[4];
1096   } else if (flag == MAT_GLOBAL_MAX) {
1097     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1098     info->nz_used      = irecv[0];
1099     info->nz_allocated = irecv[1];
1100     info->nz_unneeded  = irecv[2];
1101     info->memory       = irecv[3];
1102     info->mallocs      = irecv[4];
1103   } else if (flag == MAT_GLOBAL_SUM) {
1104     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1105     info->nz_used      = irecv[0];
1106     info->nz_allocated = irecv[1];
1107     info->nz_unneeded  = irecv[2];
1108     info->memory       = irecv[3];
1109     info->mallocs      = irecv[4];
1110   }
1111   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1112   info->fill_ratio_needed = 0;
1113   info->factor_mallocs    = 0;
1114   info->rows_global       = (double)matin->M;
1115   info->columns_global    = (double)matin->N;
1116   info->rows_local        = (double)matin->m;
1117   info->columns_local     = (double)matin->N;
1118 
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatSetOption_MPIAIJ"
1124 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1125 {
1126   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1127   int        ierr;
1128 
1129   PetscFunctionBegin;
1130   switch (op) {
1131   case MAT_NO_NEW_NONZERO_LOCATIONS:
1132   case MAT_YES_NEW_NONZERO_LOCATIONS:
1133   case MAT_COLUMNS_UNSORTED:
1134   case MAT_COLUMNS_SORTED:
1135   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1136   case MAT_KEEP_ZEROED_ROWS:
1137   case MAT_NEW_NONZERO_LOCATION_ERR:
1138   case MAT_USE_INODES:
1139   case MAT_DO_NOT_USE_INODES:
1140   case MAT_IGNORE_ZERO_ENTRIES:
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_ROW_ORIENTED:
1145     a->roworiented = PETSC_TRUE;
1146     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1147     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1148     break;
1149   case MAT_ROWS_SORTED:
1150   case MAT_ROWS_UNSORTED:
1151   case MAT_YES_NEW_DIAGONALS:
1152     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1153     break;
1154   case MAT_COLUMN_ORIENTED:
1155     a->roworiented = PETSC_FALSE;
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158     break;
1159   case MAT_IGNORE_OFF_PROC_ENTRIES:
1160     a->donotstash = PETSC_TRUE;
1161     break;
1162   case MAT_NO_NEW_DIAGONALS:
1163     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1164   default:
1165     SETERRQ(PETSC_ERR_SUP,"unknown option");
1166   }
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatGetRow_MPIAIJ"
1172 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1173 {
1174   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1175   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1176   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1177   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1178   int          *cmap,*idx_p;
1179 
1180   PetscFunctionBegin;
1181   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1182   mat->getrowactive = PETSC_TRUE;
1183 
1184   if (!mat->rowvalues && (idx || v)) {
1185     /*
1186         allocate enough space to hold information from the longest row.
1187     */
1188     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1189     int     max = 1,tmp;
1190     for (i=0; i<matin->m; i++) {
1191       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1192       if (max < tmp) { max = tmp; }
1193     }
1194     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1195     mat->rowindices = (int*)(mat->rowvalues + max);
1196   }
1197 
1198   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1199   lrow = row - rstart;
1200 
1201   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1202   if (!v)   {pvA = 0; pvB = 0;}
1203   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1204   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1205   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1206   nztot = nzA + nzB;
1207 
1208   cmap  = mat->garray;
1209   if (v  || idx) {
1210     if (nztot) {
1211       /* Sort by increasing column numbers, assuming A and B already sorted */
1212       int imark = -1;
1213       if (v) {
1214         *v = v_p = mat->rowvalues;
1215         for (i=0; i<nzB; i++) {
1216           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1217           else break;
1218         }
1219         imark = i;
1220         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1221         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1222       }
1223       if (idx) {
1224         *idx = idx_p = mat->rowindices;
1225         if (imark > -1) {
1226           for (i=0; i<imark; i++) {
1227             idx_p[i] = cmap[cworkB[i]];
1228           }
1229         } else {
1230           for (i=0; i<nzB; i++) {
1231             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1232             else break;
1233           }
1234           imark = i;
1235         }
1236         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1237         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1238       }
1239     } else {
1240       if (idx) *idx = 0;
1241       if (v)   *v   = 0;
1242     }
1243   }
1244   *nz = nztot;
1245   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1246   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1252 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1253 {
1254   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1255 
1256   PetscFunctionBegin;
1257   if (aij->getrowactive == PETSC_FALSE) {
1258     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1259   }
1260   aij->getrowactive = PETSC_FALSE;
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatNorm_MPIAIJ"
1266 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1267 {
1268   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1269   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1270   int          ierr,i,j,cstart = aij->cstart;
1271   PetscReal    sum = 0.0;
1272   PetscScalar  *v;
1273 
1274   PetscFunctionBegin;
1275   if (aij->size == 1) {
1276     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1277   } else {
1278     if (type == NORM_FROBENIUS) {
1279       v = amat->a;
1280       for (i=0; i<amat->nz; i++) {
1281 #if defined(PETSC_USE_COMPLEX)
1282         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1283 #else
1284         sum += (*v)*(*v); v++;
1285 #endif
1286       }
1287       v = bmat->a;
1288       for (i=0; i<bmat->nz; i++) {
1289 #if defined(PETSC_USE_COMPLEX)
1290         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1291 #else
1292         sum += (*v)*(*v); v++;
1293 #endif
1294       }
1295       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1296       *norm = sqrt(*norm);
1297     } else if (type == NORM_1) { /* max column norm */
1298       PetscReal *tmp,*tmp2;
1299       int    *jj,*garray = aij->garray;
1300       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1301       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1302       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1303       *norm = 0.0;
1304       v = amat->a; jj = amat->j;
1305       for (j=0; j<amat->nz; j++) {
1306         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1307       }
1308       v = bmat->a; jj = bmat->j;
1309       for (j=0; j<bmat->nz; j++) {
1310         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1311       }
1312       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1313       for (j=0; j<mat->N; j++) {
1314         if (tmp2[j] > *norm) *norm = tmp2[j];
1315       }
1316       ierr = PetscFree(tmp);CHKERRQ(ierr);
1317       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1318     } else if (type == NORM_INFINITY) { /* max row norm */
1319       PetscReal ntemp = 0.0;
1320       for (j=0; j<aij->A->m; j++) {
1321         v = amat->a + amat->i[j];
1322         sum = 0.0;
1323         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1324           sum += PetscAbsScalar(*v); v++;
1325         }
1326         v = bmat->a + bmat->i[j];
1327         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1328           sum += PetscAbsScalar(*v); v++;
1329         }
1330         if (sum > ntemp) ntemp = sum;
1331       }
1332       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1333     } else {
1334       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1335     }
1336   }
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatTranspose_MPIAIJ"
1342 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1343 {
1344   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1345   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1346   int          ierr;
1347   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1348   Mat          B;
1349   PetscScalar  *array;
1350 
1351   PetscFunctionBegin;
1352   if (!matout && M != N) {
1353     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1354   }
1355 
1356   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1357 
1358   /* copy over the A part */
1359   Aloc = (Mat_SeqAIJ*)a->A->data;
1360   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1361   row = a->rstart;
1362   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1363   for (i=0; i<m; i++) {
1364     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1365     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1366   }
1367   aj = Aloc->j;
1368   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1369 
1370   /* copy over the B part */
1371   Aloc = (Mat_SeqAIJ*)a->B->data;
1372   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1373   row  = a->rstart;
1374   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1375   ct   = cols;
1376   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1377   for (i=0; i<m; i++) {
1378     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1379     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1380   }
1381   ierr = PetscFree(ct);CHKERRQ(ierr);
1382   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1384   if (matout) {
1385     *matout = B;
1386   } else {
1387     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1394 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1395 {
1396   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1397   Mat        a = aij->A,b = aij->B;
1398   int        ierr,s1,s2,s3;
1399 
1400   PetscFunctionBegin;
1401   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1402   if (rr) {
1403     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1404     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1405     /* Overlap communication with computation. */
1406     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1407   }
1408   if (ll) {
1409     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1410     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1411     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1412   }
1413   /* scale  the diagonal block */
1414   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1415 
1416   if (rr) {
1417     /* Do a scatter end and then right scale the off-diagonal block */
1418     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1419     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1420   }
1421 
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1428 int MatPrintHelp_MPIAIJ(Mat A)
1429 {
1430   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1431   int        ierr;
1432 
1433   PetscFunctionBegin;
1434   if (!a->rank) {
1435     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1442 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1443 {
1444   PetscFunctionBegin;
1445   *bs = 1;
1446   PetscFunctionReturn(0);
1447 }
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1450 int MatSetUnfactored_MPIAIJ(Mat A)
1451 {
1452   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1453   int        ierr;
1454 
1455   PetscFunctionBegin;
1456   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatEqual_MPIAIJ"
1462 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1463 {
1464   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1465   Mat        a,b,c,d;
1466   PetscTruth flg;
1467   int        ierr;
1468 
1469   PetscFunctionBegin;
1470   a = matA->A; b = matA->B;
1471   c = matB->A; d = matB->B;
1472 
1473   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1474   if (flg == PETSC_TRUE) {
1475     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1476   }
1477   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatCopy_MPIAIJ"
1483 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1484 {
1485   int        ierr;
1486   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1487   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1488 
1489   PetscFunctionBegin;
1490   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1491   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1492     /* because of the column compression in the off-processor part of the matrix a->B,
1493        the number of columns in a->B and b->B may be different, hence we cannot call
1494        the MatCopy() directly on the two parts. If need be, we can provide a more
1495        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1496        then copying the submatrices */
1497     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1498   } else {
1499     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1500     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1507 int MatSetUpPreallocation_MPIAIJ(Mat A)
1508 {
1509   int        ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #include "petscblaslapack.h"
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatAXPY_MPIAIJ"
1519 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1520 {
1521   int        ierr,one=1,i;
1522   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1523   Mat_SeqAIJ *x,*y;
1524 
1525   PetscFunctionBegin;
1526   if (str == SAME_NONZERO_PATTERN) {
1527     x = (Mat_SeqAIJ *)xx->A->data;
1528     y = (Mat_SeqAIJ *)yy->A->data;
1529     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1530     x = (Mat_SeqAIJ *)xx->B->data;
1531     y = (Mat_SeqAIJ *)yy->B->data;
1532     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1533   } else if (str == SUBSET_NONZERO_PATTERN) {
1534     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1535 
1536     x = (Mat_SeqAIJ *)xx->B->data;
1537     y = (Mat_SeqAIJ *)yy->B->data;
1538     if (y->xtoy && y->XtoY != xx->B) {
1539       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1540       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1541     }
1542     if (!y->xtoy) { /* get xtoy */
1543       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1544       y->XtoY = xx->B;
1545     }
1546     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1547   } else {
1548     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1549   }
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 /* -------------------------------------------------------------------*/
1554 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1555        MatGetRow_MPIAIJ,
1556        MatRestoreRow_MPIAIJ,
1557        MatMult_MPIAIJ,
1558 /* 4*/ MatMultAdd_MPIAIJ,
1559        MatMultTranspose_MPIAIJ,
1560        MatMultTransposeAdd_MPIAIJ,
1561        0,
1562        0,
1563        0,
1564 /*10*/ 0,
1565        0,
1566        0,
1567        MatRelax_MPIAIJ,
1568        MatTranspose_MPIAIJ,
1569 /*15*/ MatGetInfo_MPIAIJ,
1570        MatEqual_MPIAIJ,
1571        MatGetDiagonal_MPIAIJ,
1572        MatDiagonalScale_MPIAIJ,
1573        MatNorm_MPIAIJ,
1574 /*20*/ MatAssemblyBegin_MPIAIJ,
1575        MatAssemblyEnd_MPIAIJ,
1576        0,
1577        MatSetOption_MPIAIJ,
1578        MatZeroEntries_MPIAIJ,
1579 /*25*/ MatZeroRows_MPIAIJ,
1580 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1581        MatLUFactorSymbolic_MPIAIJ_TFS,
1582 #else
1583        0,
1584 #endif
1585        0,
1586        0,
1587        0,
1588 /*30*/ MatSetUpPreallocation_MPIAIJ,
1589        0,
1590        0,
1591        0,
1592        0,
1593 /*35*/ MatDuplicate_MPIAIJ,
1594        0,
1595        0,
1596        0,
1597        0,
1598 /*40*/ MatAXPY_MPIAIJ,
1599        MatGetSubMatrices_MPIAIJ,
1600        MatIncreaseOverlap_MPIAIJ,
1601        MatGetValues_MPIAIJ,
1602        MatCopy_MPIAIJ,
1603 /*45*/ MatPrintHelp_MPIAIJ,
1604        MatScale_MPIAIJ,
1605        0,
1606        0,
1607        0,
1608 /*50*/ MatGetBlockSize_MPIAIJ,
1609        0,
1610        0,
1611        0,
1612        0,
1613 /*55*/ MatFDColoringCreate_MPIAIJ,
1614        0,
1615        MatSetUnfactored_MPIAIJ,
1616        0,
1617        0,
1618 /*60*/ MatGetSubMatrix_MPIAIJ,
1619        MatDestroy_MPIAIJ,
1620        MatView_MPIAIJ,
1621        MatGetPetscMaps_Petsc,
1622        0,
1623 /*65*/ 0,
1624        0,
1625        0,
1626        0,
1627        0,
1628 /*70*/ 0,
1629        0,
1630        MatSetColoring_MPIAIJ,
1631        MatSetValuesAdic_MPIAIJ,
1632        MatSetValuesAdifor_MPIAIJ,
1633 /*75*/ 0,
1634        0,
1635        0,
1636        0,
1637        0,
1638 /*80*/ 0,
1639        0,
1640        0,
1641        0,
1642        0,
1643 /*85*/ MatLoad_MPIAIJ};
1644 
1645 /* ----------------------------------------------------------------------------------------*/
1646 
1647 EXTERN_C_BEGIN
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1650 int MatStoreValues_MPIAIJ(Mat mat)
1651 {
1652   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1653   int        ierr;
1654 
1655   PetscFunctionBegin;
1656   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1657   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 EXTERN_C_END
1661 
1662 EXTERN_C_BEGIN
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1665 int MatRetrieveValues_MPIAIJ(Mat mat)
1666 {
1667   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1668   int        ierr;
1669 
1670   PetscFunctionBegin;
1671   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1672   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 EXTERN_C_END
1676 
1677 #include "petscpc.h"
1678 EXTERN_C_BEGIN
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1681 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1682 {
1683   Mat_MPIAIJ   *b;
1684   int          ierr,i;
1685 
1686   PetscFunctionBegin;
1687   B->preallocated = PETSC_TRUE;
1688   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1689   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1690   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1691   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1692   if (d_nnz) {
1693     for (i=0; i<B->m; i++) {
1694       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1695     }
1696   }
1697   if (o_nnz) {
1698     for (i=0; i<B->m; i++) {
1699       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1700     }
1701   }
1702   b = (Mat_MPIAIJ*)B->data;
1703 
1704   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1705   PetscLogObjectParent(B,b->A);
1706   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1707   PetscLogObjectParent(B,b->B);
1708 
1709   PetscFunctionReturn(0);
1710 }
1711 EXTERN_C_END
1712 
1713 /*MC
1714    MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1715 
1716    Options Database Keys:
1717 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1718 
1719   Level: beginner
1720 
1721 .seealso: MatCreateMPIAIJ
1722 M*/
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatCreate_MPIAIJ"
1727 int MatCreate_MPIAIJ(Mat B)
1728 {
1729   Mat_MPIAIJ *b;
1730   int        ierr,i,size;
1731 
1732   PetscFunctionBegin;
1733   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1734 
1735   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1736   B->data         = (void*)b;
1737   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1738   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1739   B->factor       = 0;
1740   B->assembled    = PETSC_FALSE;
1741   B->mapping      = 0;
1742 
1743   B->insertmode      = NOT_SET_VALUES;
1744   b->size            = size;
1745   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1746 
1747   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1748   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1749 
1750   /* the information in the maps duplicates the information computed below, eventually
1751      we should remove the duplicate information that is not contained in the maps */
1752   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1753   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1754 
1755   /* build local table of row and column ownerships */
1756   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1757   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1758   b->cowners = b->rowners + b->size + 2;
1759   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1760   b->rowners[0] = 0;
1761   for (i=2; i<=b->size; i++) {
1762     b->rowners[i] += b->rowners[i-1];
1763   }
1764   b->rstart = b->rowners[b->rank];
1765   b->rend   = b->rowners[b->rank+1];
1766   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1767   b->cowners[0] = 0;
1768   for (i=2; i<=b->size; i++) {
1769     b->cowners[i] += b->cowners[i-1];
1770   }
1771   b->cstart = b->cowners[b->rank];
1772   b->cend   = b->cowners[b->rank+1];
1773 
1774   /* build cache for off array entries formed */
1775   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1776   b->donotstash  = PETSC_FALSE;
1777   b->colmap      = 0;
1778   b->garray      = 0;
1779   b->roworiented = PETSC_TRUE;
1780 
1781   /* stuff used for matrix vector multiply */
1782   b->lvec      = PETSC_NULL;
1783   b->Mvctx     = PETSC_NULL;
1784 
1785   /* stuff for MatGetRow() */
1786   b->rowindices   = 0;
1787   b->rowvalues    = 0;
1788   b->getrowactive = PETSC_FALSE;
1789 
1790   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1791                                      "MatStoreValues_MPIAIJ",
1792                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1794                                      "MatRetrieveValues_MPIAIJ",
1795                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1797 				     "MatGetDiagonalBlock_MPIAIJ",
1798                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1800 				     "MatIsSymmetric_MPIAIJ",
1801 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1803 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1804 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1806 				     "MatDiagonalScaleLocal_MPIAIJ",
1807 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 EXTERN_C_END
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1814 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1815 {
1816   Mat        mat;
1817   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1818   int        ierr;
1819 
1820   PetscFunctionBegin;
1821   *newmat       = 0;
1822   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1823   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1824   a    = (Mat_MPIAIJ*)mat->data;
1825   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1826   mat->factor       = matin->factor;
1827   mat->assembled    = PETSC_TRUE;
1828   mat->insertmode   = NOT_SET_VALUES;
1829   mat->preallocated = PETSC_TRUE;
1830 
1831   a->rstart       = oldmat->rstart;
1832   a->rend         = oldmat->rend;
1833   a->cstart       = oldmat->cstart;
1834   a->cend         = oldmat->cend;
1835   a->size         = oldmat->size;
1836   a->rank         = oldmat->rank;
1837   a->donotstash   = oldmat->donotstash;
1838   a->roworiented  = oldmat->roworiented;
1839   a->rowindices   = 0;
1840   a->rowvalues    = 0;
1841   a->getrowactive = PETSC_FALSE;
1842 
1843   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1844   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1845   if (oldmat->colmap) {
1846 #if defined (PETSC_USE_CTABLE)
1847     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1848 #else
1849     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1850     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1851     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1852 #endif
1853   } else a->colmap = 0;
1854   if (oldmat->garray) {
1855     int len;
1856     len  = oldmat->B->n;
1857     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1858     PetscLogObjectMemory(mat,len*sizeof(int));
1859     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1860   } else a->garray = 0;
1861 
1862   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1863   PetscLogObjectParent(mat,a->lvec);
1864   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1865   PetscLogObjectParent(mat,a->Mvctx);
1866   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1867   PetscLogObjectParent(mat,a->A);
1868   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1869   PetscLogObjectParent(mat,a->B);
1870   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1871   *newmat = mat;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #include "petscsys.h"
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatLoad_MPIAIJ"
1879 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1880 {
1881   Mat          A;
1882   PetscScalar  *vals,*svals;
1883   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1884   MPI_Status   status;
1885   int          i,nz,ierr,j,rstart,rend,fd;
1886   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1887   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1888   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1889 
1890   PetscFunctionBegin;
1891   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1892   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1893   if (!rank) {
1894     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1895     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1896     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1897     if (header[3] < 0) {
1898       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1899     }
1900   }
1901 
1902   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1903   M = header[1]; N = header[2];
1904   /* determine ownership of all rows */
1905   m = M/size + ((M % size) > rank);
1906   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1907   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1908   rowners[0] = 0;
1909   for (i=2; i<=size; i++) {
1910     rowners[i] += rowners[i-1];
1911   }
1912   rstart = rowners[rank];
1913   rend   = rowners[rank+1];
1914 
1915   /* distribute row lengths to all processors */
1916   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1917   offlens = ourlens + (rend-rstart);
1918   if (!rank) {
1919     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1920     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1921     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1922     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1923     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1924     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1925   } else {
1926     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1927   }
1928 
1929   if (!rank) {
1930     /* calculate the number of nonzeros on each processor */
1931     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1932     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1933     for (i=0; i<size; i++) {
1934       for (j=rowners[i]; j< rowners[i+1]; j++) {
1935         procsnz[i] += rowlengths[j];
1936       }
1937     }
1938     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1939 
1940     /* determine max buffer needed and allocate it */
1941     maxnz = 0;
1942     for (i=0; i<size; i++) {
1943       maxnz = PetscMax(maxnz,procsnz[i]);
1944     }
1945     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1946 
1947     /* read in my part of the matrix column indices  */
1948     nz   = procsnz[0];
1949     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1950     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1951 
1952     /* read in every one elses and ship off */
1953     for (i=1; i<size; i++) {
1954       nz   = procsnz[i];
1955       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1956       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1957     }
1958     ierr = PetscFree(cols);CHKERRQ(ierr);
1959   } else {
1960     /* determine buffer space needed for message */
1961     nz = 0;
1962     for (i=0; i<m; i++) {
1963       nz += ourlens[i];
1964     }
1965     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1966 
1967     /* receive message of column indices*/
1968     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1969     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1970     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1971   }
1972 
1973   /* determine column ownership if matrix is not square */
1974   if (N != M) {
1975     n      = N/size + ((N % size) > rank);
1976     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1977     cstart = cend - n;
1978   } else {
1979     cstart = rstart;
1980     cend   = rend;
1981     n      = cend - cstart;
1982   }
1983 
1984   /* loop over local rows, determining number of off diagonal entries */
1985   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1986   jj = 0;
1987   for (i=0; i<m; i++) {
1988     for (j=0; j<ourlens[i]; j++) {
1989       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1990       jj++;
1991     }
1992   }
1993 
1994   /* create our matrix */
1995   for (i=0; i<m; i++) {
1996     ourlens[i] -= offlens[i];
1997   }
1998   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1999   ierr = MatSetType(A,type);CHKERRQ(ierr);
2000   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2001 
2002   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2003   for (i=0; i<m; i++) {
2004     ourlens[i] += offlens[i];
2005   }
2006 
2007   if (!rank) {
2008     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2009 
2010     /* read in my part of the matrix numerical values  */
2011     nz   = procsnz[0];
2012     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2013 
2014     /* insert into matrix */
2015     jj      = rstart;
2016     smycols = mycols;
2017     svals   = vals;
2018     for (i=0; i<m; i++) {
2019       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2020       smycols += ourlens[i];
2021       svals   += ourlens[i];
2022       jj++;
2023     }
2024 
2025     /* read in other processors and ship out */
2026     for (i=1; i<size; i++) {
2027       nz   = procsnz[i];
2028       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2029       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2030     }
2031     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2032   } else {
2033     /* receive numeric values */
2034     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2035 
2036     /* receive message of values*/
2037     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2038     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2039     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2040 
2041     /* insert into matrix */
2042     jj      = rstart;
2043     smycols = mycols;
2044     svals   = vals;
2045     for (i=0; i<m; i++) {
2046       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2047       smycols += ourlens[i];
2048       svals   += ourlens[i];
2049       jj++;
2050     }
2051   }
2052   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2053   ierr = PetscFree(vals);CHKERRQ(ierr);
2054   ierr = PetscFree(mycols);CHKERRQ(ierr);
2055   ierr = PetscFree(rowners);CHKERRQ(ierr);
2056 
2057   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2058   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2059   *newmat = A;
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2065 /*
2066     Not great since it makes two copies of the submatrix, first an SeqAIJ
2067   in local and then by concatenating the local matrices the end result.
2068   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2069 */
2070 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2071 {
2072   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2073   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2074   Mat          *local,M,Mreuse;
2075   PetscScalar  *vwork,*aa;
2076   MPI_Comm     comm = mat->comm;
2077   Mat_SeqAIJ   *aij;
2078 
2079 
2080   PetscFunctionBegin;
2081   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2082   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2083 
2084   if (call ==  MAT_REUSE_MATRIX) {
2085     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2086     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2087     local = &Mreuse;
2088     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2089   } else {
2090     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2091     Mreuse = *local;
2092     ierr   = PetscFree(local);CHKERRQ(ierr);
2093   }
2094 
2095   /*
2096       m - number of local rows
2097       n - number of columns (same on all processors)
2098       rstart - first row in new global matrix generated
2099   */
2100   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2101   if (call == MAT_INITIAL_MATRIX) {
2102     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2103     ii  = aij->i;
2104     jj  = aij->j;
2105 
2106     /*
2107         Determine the number of non-zeros in the diagonal and off-diagonal
2108         portions of the matrix in order to do correct preallocation
2109     */
2110 
2111     /* first get start and end of "diagonal" columns */
2112     if (csize == PETSC_DECIDE) {
2113       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2114       if (mglobal == n) { /* square matrix */
2115 	nlocal = m;
2116       } else {
2117         nlocal = n/size + ((n % size) > rank);
2118       }
2119     } else {
2120       nlocal = csize;
2121     }
2122     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2123     rstart = rend - nlocal;
2124     if (rank == size - 1 && rend != n) {
2125       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2126     }
2127 
2128     /* next, compute all the lengths */
2129     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2130     olens = dlens + m;
2131     for (i=0; i<m; i++) {
2132       jend = ii[i+1] - ii[i];
2133       olen = 0;
2134       dlen = 0;
2135       for (j=0; j<jend; j++) {
2136         if (*jj < rstart || *jj >= rend) olen++;
2137         else dlen++;
2138         jj++;
2139       }
2140       olens[i] = olen;
2141       dlens[i] = dlen;
2142     }
2143     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2144     ierr = PetscFree(dlens);CHKERRQ(ierr);
2145   } else {
2146     int ml,nl;
2147 
2148     M = *newmat;
2149     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2150     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2151     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2152     /*
2153          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2154        rather than the slower MatSetValues().
2155     */
2156     M->was_assembled = PETSC_TRUE;
2157     M->assembled     = PETSC_FALSE;
2158   }
2159   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2160   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2161   ii  = aij->i;
2162   jj  = aij->j;
2163   aa  = aij->a;
2164   for (i=0; i<m; i++) {
2165     row   = rstart + i;
2166     nz    = ii[i+1] - ii[i];
2167     cwork = jj;     jj += nz;
2168     vwork = aa;     aa += nz;
2169     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2170   }
2171 
2172   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2173   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2174   *newmat = M;
2175 
2176   /* save submatrix used in processor for next request */
2177   if (call ==  MAT_INITIAL_MATRIX) {
2178     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2179     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2180   }
2181 
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2187 /*@C
2188    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2189    (the default parallel PETSc format).  For good matrix assembly performance
2190    the user should preallocate the matrix storage by setting the parameters
2191    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2192    performance can be increased by more than a factor of 50.
2193 
2194    Collective on MPI_Comm
2195 
2196    Input Parameters:
2197 +  A - the matrix
2198 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2199            (same value is used for all local rows)
2200 .  d_nnz - array containing the number of nonzeros in the various rows of the
2201            DIAGONAL portion of the local submatrix (possibly different for each row)
2202            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2203            The size of this array is equal to the number of local rows, i.e 'm'.
2204            You must leave room for the diagonal entry even if it is zero.
2205 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2206            submatrix (same value is used for all local rows).
2207 -  o_nnz - array containing the number of nonzeros in the various rows of the
2208            OFF-DIAGONAL portion of the local submatrix (possibly different for
2209            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2210            structure. The size of this array is equal to the number
2211            of local rows, i.e 'm'.
2212 
2213    The AIJ format (also called the Yale sparse matrix format or
2214    compressed row storage), is fully compatible with standard Fortran 77
2215    storage.  That is, the stored row and column indices can begin at
2216    either one (as in Fortran) or zero.  See the users manual for details.
2217 
2218    The user MUST specify either the local or global matrix dimensions
2219    (possibly both).
2220 
2221    The parallel matrix is partitioned such that the first m0 rows belong to
2222    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2223    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2224 
2225    The DIAGONAL portion of the local submatrix of a processor can be defined
2226    as the submatrix which is obtained by extraction the part corresponding
2227    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2228    first row that belongs to the processor, and r2 is the last row belonging
2229    to the this processor. This is a square mxm matrix. The remaining portion
2230    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2231 
2232    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2233 
2234    By default, this format uses inodes (identical nodes) when possible.
2235    We search for consecutive rows with the same nonzero structure, thereby
2236    reusing matrix information to achieve increased efficiency.
2237 
2238    Options Database Keys:
2239 +  -mat_aij_no_inode  - Do not use inodes
2240 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2241 -  -mat_aij_oneindex - Internally use indexing starting at 1
2242         rather than 0.  Note that when calling MatSetValues(),
2243         the user still MUST index entries starting at 0!
2244 
2245    Example usage:
2246 
2247    Consider the following 8x8 matrix with 34 non-zero values, that is
2248    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2249    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2250    as follows:
2251 
2252 .vb
2253             1  2  0  |  0  3  0  |  0  4
2254     Proc0   0  5  6  |  7  0  0  |  8  0
2255             9  0 10  | 11  0  0  | 12  0
2256     -------------------------------------
2257            13  0 14  | 15 16 17  |  0  0
2258     Proc1   0 18  0  | 19 20 21  |  0  0
2259             0  0  0  | 22 23  0  | 24  0
2260     -------------------------------------
2261     Proc2  25 26 27  |  0  0 28  | 29  0
2262            30  0  0  | 31 32 33  |  0 34
2263 .ve
2264 
2265    This can be represented as a collection of submatrices as:
2266 
2267 .vb
2268       A B C
2269       D E F
2270       G H I
2271 .ve
2272 
2273    Where the submatrices A,B,C are owned by proc0, D,E,F are
2274    owned by proc1, G,H,I are owned by proc2.
2275 
2276    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2277    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2278    The 'M','N' parameters are 8,8, and have the same values on all procs.
2279 
2280    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2281    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2282    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2283    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2284    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2285    matrix, ans [DF] as another SeqAIJ matrix.
2286 
2287    When d_nz, o_nz parameters are specified, d_nz storage elements are
2288    allocated for every row of the local diagonal submatrix, and o_nz
2289    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2290    One way to choose d_nz and o_nz is to use the max nonzerors per local
2291    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2292    In this case, the values of d_nz,o_nz are:
2293 .vb
2294      proc0 : dnz = 2, o_nz = 2
2295      proc1 : dnz = 3, o_nz = 2
2296      proc2 : dnz = 1, o_nz = 4
2297 .ve
2298    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2299    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2300    for proc3. i.e we are using 12+15+10=37 storage locations to store
2301    34 values.
2302 
2303    When d_nnz, o_nnz parameters are specified, the storage is specified
2304    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2305    In the above case the values for d_nnz,o_nnz are:
2306 .vb
2307      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2308      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2309      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2310 .ve
2311    Here the space allocated is sum of all the above values i.e 34, and
2312    hence pre-allocation is perfect.
2313 
2314    Level: intermediate
2315 
2316 .keywords: matrix, aij, compressed row, sparse, parallel
2317 
2318 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2319 @*/
2320 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2321 {
2322   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2323 
2324   PetscFunctionBegin;
2325   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2326   if (f) {
2327     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2328   }
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatCreateMPIAIJ"
2334 /*@C
2335    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2336    (the default parallel PETSc format).  For good matrix assembly performance
2337    the user should preallocate the matrix storage by setting the parameters
2338    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2339    performance can be increased by more than a factor of 50.
2340 
2341    Collective on MPI_Comm
2342 
2343    Input Parameters:
2344 +  comm - MPI communicator
2345 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2346            This value should be the same as the local size used in creating the
2347            y vector for the matrix-vector product y = Ax.
2348 .  n - This value should be the same as the local size used in creating the
2349        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2350        calculated if N is given) For square matrices n is almost always m.
2351 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2352 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2353 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2354            (same value is used for all local rows)
2355 .  d_nnz - array containing the number of nonzeros in the various rows of the
2356            DIAGONAL portion of the local submatrix (possibly different for each row)
2357            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2358            The size of this array is equal to the number of local rows, i.e 'm'.
2359            You must leave room for the diagonal entry even if it is zero.
2360 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2361            submatrix (same value is used for all local rows).
2362 -  o_nnz - array containing the number of nonzeros in the various rows of the
2363            OFF-DIAGONAL portion of the local submatrix (possibly different for
2364            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2365            structure. The size of this array is equal to the number
2366            of local rows, i.e 'm'.
2367 
2368    Output Parameter:
2369 .  A - the matrix
2370 
2371    Notes:
2372    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2373    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2374    storage requirements for this matrix.
2375 
2376    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2377    processor than it must be used on all processors that share the object for
2378    that argument.
2379 
2380    The AIJ format (also called the Yale sparse matrix format or
2381    compressed row storage), is fully compatible with standard Fortran 77
2382    storage.  That is, the stored row and column indices can begin at
2383    either one (as in Fortran) or zero.  See the users manual for details.
2384 
2385    The user MUST specify either the local or global matrix dimensions
2386    (possibly both).
2387 
2388    The parallel matrix is partitioned such that the first m0 rows belong to
2389    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2390    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2391 
2392    The DIAGONAL portion of the local submatrix of a processor can be defined
2393    as the submatrix which is obtained by extraction the part corresponding
2394    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2395    first row that belongs to the processor, and r2 is the last row belonging
2396    to the this processor. This is a square mxm matrix. The remaining portion
2397    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2398 
2399    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2400 
2401    When calling this routine with a single process communicator, a matrix of
2402    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2403    type of communicator, use the construction mechanism:
2404      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2405 
2406    By default, this format uses inodes (identical nodes) when possible.
2407    We search for consecutive rows with the same nonzero structure, thereby
2408    reusing matrix information to achieve increased efficiency.
2409 
2410    Options Database Keys:
2411 +  -mat_aij_no_inode  - Do not use inodes
2412 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2413 -  -mat_aij_oneindex - Internally use indexing starting at 1
2414         rather than 0.  Note that when calling MatSetValues(),
2415         the user still MUST index entries starting at 0!
2416 
2417 
2418    Example usage:
2419 
2420    Consider the following 8x8 matrix with 34 non-zero values, that is
2421    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2422    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2423    as follows:
2424 
2425 .vb
2426             1  2  0  |  0  3  0  |  0  4
2427     Proc0   0  5  6  |  7  0  0  |  8  0
2428             9  0 10  | 11  0  0  | 12  0
2429     -------------------------------------
2430            13  0 14  | 15 16 17  |  0  0
2431     Proc1   0 18  0  | 19 20 21  |  0  0
2432             0  0  0  | 22 23  0  | 24  0
2433     -------------------------------------
2434     Proc2  25 26 27  |  0  0 28  | 29  0
2435            30  0  0  | 31 32 33  |  0 34
2436 .ve
2437 
2438    This can be represented as a collection of submatrices as:
2439 
2440 .vb
2441       A B C
2442       D E F
2443       G H I
2444 .ve
2445 
2446    Where the submatrices A,B,C are owned by proc0, D,E,F are
2447    owned by proc1, G,H,I are owned by proc2.
2448 
2449    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2450    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2451    The 'M','N' parameters are 8,8, and have the same values on all procs.
2452 
2453    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2454    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2455    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2456    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2457    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2458    matrix, ans [DF] as another SeqAIJ matrix.
2459 
2460    When d_nz, o_nz parameters are specified, d_nz storage elements are
2461    allocated for every row of the local diagonal submatrix, and o_nz
2462    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2463    One way to choose d_nz and o_nz is to use the max nonzerors per local
2464    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2465    In this case, the values of d_nz,o_nz are:
2466 .vb
2467      proc0 : dnz = 2, o_nz = 2
2468      proc1 : dnz = 3, o_nz = 2
2469      proc2 : dnz = 1, o_nz = 4
2470 .ve
2471    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2472    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2473    for proc3. i.e we are using 12+15+10=37 storage locations to store
2474    34 values.
2475 
2476    When d_nnz, o_nnz parameters are specified, the storage is specified
2477    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2478    In the above case the values for d_nnz,o_nnz are:
2479 .vb
2480      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2481      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2482      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2483 .ve
2484    Here the space allocated is sum of all the above values i.e 34, and
2485    hence pre-allocation is perfect.
2486 
2487    Level: intermediate
2488 
2489 .keywords: matrix, aij, compressed row, sparse, parallel
2490 
2491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2492 @*/
2493 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2494 {
2495   int ierr,size;
2496 
2497   PetscFunctionBegin;
2498   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2499   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2500   if (size > 1) {
2501     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2502     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2503   } else {
2504     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2505     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2506   }
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2512 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2513 {
2514   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2515   PetscFunctionBegin;
2516   *Ad     = a->A;
2517   *Ao     = a->B;
2518   *colmap = a->garray;
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2524 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2525 {
2526   int        ierr,i;
2527   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2528 
2529   PetscFunctionBegin;
2530   if (coloring->ctype == IS_COLORING_LOCAL) {
2531     ISColoringValue *allcolors,*colors;
2532     ISColoring      ocoloring;
2533 
2534     /* set coloring for diagonal portion */
2535     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2536 
2537     /* set coloring for off-diagonal portion */
2538     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2539     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2540     for (i=0; i<a->B->n; i++) {
2541       colors[i] = allcolors[a->garray[i]];
2542     }
2543     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2544     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2545     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2546     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2547   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2548     ISColoringValue *colors;
2549     int             *larray;
2550     ISColoring      ocoloring;
2551 
2552     /* set coloring for diagonal portion */
2553     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2554     for (i=0; i<a->A->n; i++) {
2555       larray[i] = i + a->cstart;
2556     }
2557     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2558     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2559     for (i=0; i<a->A->n; i++) {
2560       colors[i] = coloring->colors[larray[i]];
2561     }
2562     ierr = PetscFree(larray);CHKERRQ(ierr);
2563     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2564     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2565     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2566 
2567     /* set coloring for off-diagonal portion */
2568     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2569     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2570     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2571     for (i=0; i<a->B->n; i++) {
2572       colors[i] = coloring->colors[larray[i]];
2573     }
2574     ierr = PetscFree(larray);CHKERRQ(ierr);
2575     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2576     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2577     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2578   } else {
2579     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2580   }
2581 
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 #undef __FUNCT__
2586 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2587 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2588 {
2589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2590   int        ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2594   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 #undef __FUNCT__
2599 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2600 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2601 {
2602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2603   int        ierr;
2604 
2605   PetscFunctionBegin;
2606   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2607   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatMerge"
2613 /*@C
2614       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2615                  matrices from each processor
2616 
2617     Collective on MPI_Comm
2618 
2619    Input Parameters:
2620 +    comm - the communicators the parallel matrix will live on
2621 -    inmat - the input sequential matrices
2622 
2623    Output Parameter:
2624 .    outmat - the parallel matrix generated
2625 
2626     Level: advanced
2627 
2628    Notes: The number of columns of the matrix in EACH of the seperate files
2629       MUST be the same.
2630 
2631 @*/
2632 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2633 {
2634   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2635   PetscScalar *values;
2636   PetscMap    columnmap,rowmap;
2637 
2638   PetscFunctionBegin;
2639 
2640   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2641 
2642   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2643   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2644   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2645   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2646   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2647   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2648 
2649   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2650   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2651   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2652   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2653   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2654 
2655   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2656   for (i=0;i<m;i++) {
2657     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2658     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2659     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2660   }
2661   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2662   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2663 
2664   for (i=0;i<m;i++) {
2665     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2666     I    = i + rstart;
2667     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2668     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2669   }
2670   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2671   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2672   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2673 
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatFileSplit"
2679 int MatFileSplit(Mat A,char *outfile)
2680 {
2681   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2682   PetscViewer out;
2683   char        *name;
2684   Mat         B;
2685   PetscScalar *values;
2686 
2687   PetscFunctionBegin;
2688 
2689   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2690   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2691   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2692   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2693   for (i=0;i<m;i++) {
2694     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2695     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2696     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2697   }
2698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2700 
2701   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2702   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2703   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2704   sprintf(name,"%s.%d",outfile,rank);
2705   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2706   ierr = PetscFree(name);
2707   ierr = MatView(B,out);CHKERRQ(ierr);
2708   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2709   ierr = MatDestroy(B);CHKERRQ(ierr);
2710   PetscFunctionReturn(0);
2711 }
2712