xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 973046187a5e16e2b37c23287137a5e6ee1ae012)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 /*
8   Local utility routine that creates a mapping from the global column
9 number to the local number in the off-diagonal part of the local
10 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
11 a slightly higher hash table cost; without it it is not scalable (each processor
12 has an order N integer array but is fast to acess.
13 */
14 #undef __FUNCT__
15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19   int        n = aij->B->n,i,ierr;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
29   PetscLogObjectMemory(mat,mat->N*sizeof(int));
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39  \
40     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
41     rmax = aimax[row]; nrow = ailen[row];  \
42     col1 = col - shift; \
43      \
44     low = 0; high = nrow; \
45     while (high-low > 5) { \
46       t = (low+high)/2; \
47       if (rp[t] > col) high = t; \
48       else             low  = t; \
49     } \
50       for (_i=low; _i<high; _i++) { \
51         if (rp[_i] > col1) break; \
52         if (rp[_i] == col1) { \
53           if (addv == ADD_VALUES) ap[_i] += value;   \
54           else                  ap[_i] = value; \
55           goto a_noinsert; \
56         } \
57       }  \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (int*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(int));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                  ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (nonew == 1) goto b_noinsert; \
133       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
134       if (nrow >= rmax) { \
135         /* there is no extra room in row, therefore enlarge */ \
136         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
137         PetscScalar *new_a; \
138  \
139         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
140  \
141         /* malloc new storage space */ \
142         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
143         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
144         new_j   = (int*)(new_a + new_nz); \
145         new_i   = new_j + new_nz; \
146  \
147         /* copy over old data into new slots */ \
148         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
149         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
150         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
151         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
152         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
153                                                            len*sizeof(int));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
156                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
157         /* free up old matrix storage */ \
158  \
159         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
160         if (!b->singlemalloc) { \
161           ierr = PetscFree(b->i);CHKERRQ(ierr); \
162           ierr = PetscFree(b->j);CHKERRQ(ierr); \
163         } \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = PETSC_TRUE; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for (ii=N; ii>=_i; ii--) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIAIJ"
187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
190   PetscScalar  value;
191   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
277   int        cstart = aij->cstart,cend = aij->cend,row,col;
278 
279   PetscFunctionBegin;
280   for (i=0; i<m; i++) {
281     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
282     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
283     if (idxm[i] >= rstart && idxm[i] < rend) {
284       row = idxm[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
287         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
288         if (idxn[j] >= cstart && idxn[j] < cend){
289           col = idxn[j] - cstart;
290           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
291         } else {
292           if (!aij->colmap) {
293             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
294           }
295 #if defined (PETSC_USE_CTABLE)
296           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
297           col --;
298 #else
299           col = aij->colmap[idxn[j]] - 1;
300 #endif
301           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
302           else {
303             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
304           }
305         }
306       }
307     } else {
308       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
309     }
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317 {
318   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
319   int         ierr,nstash,reallocs;
320   InsertMode  addv;
321 
322   PetscFunctionBegin;
323   if (aij->donotstash) {
324     PetscFunctionReturn(0);
325   }
326 
327   /* make sure all processors are either in INSERTMODE or ADDMODE */
328   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
329   if (addv == (ADD_VALUES|INSERT_VALUES)) {
330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331   }
332   mat->insertmode = addv; /* in case this processor had no cache */
333 
334   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
335   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
336   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
337   PetscFunctionReturn(0);
338 }
339 
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
344 {
345   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
346   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
347   int         i,j,rstart,ncols,n,ierr,flg;
348   int         *row,*col,other_disassembled;
349   PetscScalar *val;
350   InsertMode  addv = mat->insertmode;
351 
352   PetscFunctionBegin;
353   if (!aij->donotstash) {
354     while (1) {
355       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
356       if (!flg) break;
357 
358       for (i=0; i<n;) {
359         /* Now identify the consecutive vals belonging to the same row */
360         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361         if (j < n) ncols = j-i;
362         else       ncols = n-i;
363         /* Now assemble all these values with a single function call */
364         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
365         i = j;
366       }
367     }
368     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
369   }
370 
371   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
372   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
373 
374   /* determine if any processor has disassembled, if so we must
375      also disassemble ourselfs, in order that we may reassemble. */
376   /*
377      if nonzero structure of submatrix B cannot change then we know that
378      no processor disassembled thus we can skip this stuff
379   */
380   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
381     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
382     if (mat->was_assembled && !other_disassembled) {
383       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
384     }
385   }
386 
387   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
389   }
390   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
391   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
392 
393   if (aij->rowvalues) {
394     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
395     aij->rowvalues = 0;
396   }
397 
398   /* used by MatAXPY() */
399   a->xtoy = 0; b->xtoy = 0;
400   a->XtoY = 0; b->XtoY = 0;
401 
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
407 int MatZeroEntries_MPIAIJ(Mat A)
408 {
409   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410   int        ierr;
411 
412   PetscFunctionBegin;
413   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
414   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroRows_MPIAIJ"
420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
421 {
422   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
423   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
424   int            *nprocs,j,idx,nsends,row;
425   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
426   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
427   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
428   MPI_Comm       comm = A->comm;
429   MPI_Request    *send_waits,*recv_waits;
430   MPI_Status     recv_status,*send_status;
431   IS             istmp;
432   PetscTruth     found;
433 
434   PetscFunctionBegin;
435   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
442   for (i=0; i<N; i++) {
443     idx = rows[i];
444     found = PETSC_FALSE;
445     for (j=0; j<size; j++) {
446       if (idx >= owners[j] && idx < owners[j+1]) {
447         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
448       }
449     }
450     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
451   }
452   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
453 
454   /* inform other processors of number of messages and max length*/
455   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
456 
457   /* post receives:   */
458   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
459   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
460   for (i=0; i<nrecvs; i++) {
461     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
462   }
463 
464   /* do sends:
465       1) starts[i] gives the starting index in svalues for stuff going to
466          the ith processor
467   */
468   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
469   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
470   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
471   starts[0] = 0;
472   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
473   for (i=0; i<N; i++) {
474     svalues[starts[owner[i]]++] = rows[i];
475   }
476   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
477 
478   starts[0] = 0;
479   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   count = 0;
481   for (i=0; i<size; i++) {
482     if (nprocs[2*i+1]) {
483       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
484     }
485   }
486   ierr = PetscFree(starts);CHKERRQ(ierr);
487 
488   base = owners[rank];
489 
490   /*  wait on receives */
491   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
492   source = lens + nrecvs;
493   count  = nrecvs; slen = 0;
494   while (count) {
495     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
496     /* unpack receives into our local space */
497     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
498     source[imdex]  = recv_status.MPI_SOURCE;
499     lens[imdex]    = n;
500     slen          += n;
501     count--;
502   }
503   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
504 
505   /* move the data into the send scatter */
506   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<nrecvs; i++) {
509     values = rvalues + i*nmax;
510     for (j=0; j<lens[i]; j++) {
511       lrows[count++] = values[j] - base;
512     }
513   }
514   ierr = PetscFree(rvalues);CHKERRQ(ierr);
515   ierr = PetscFree(lens);CHKERRQ(ierr);
516   ierr = PetscFree(owner);CHKERRQ(ierr);
517   ierr = PetscFree(nprocs);CHKERRQ(ierr);
518 
519   /* actually zap the local rows */
520   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
521   PetscLogObjectParent(A,istmp);
522 
523   /*
524         Zero the required rows. If the "diagonal block" of the matrix
525      is square and the user wishes to set the diagonal we use seperate
526      code so that MatSetValues() is not called for each diagonal allocating
527      new memory, thus calling lots of mallocs and slowing things down.
528 
529        Contributed by: Mathew Knepley
530   */
531   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
533   if (diag && (l->A->M == l->A->N)) {
534     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
535   } else if (diag) {
536     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
537     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540     }
541     for (i = 0; i < slen; i++) {
542       row  = lrows[i] + rstart;
543       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   } else {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549   }
550   ierr = ISDestroy(istmp);CHKERRQ(ierr);
551   ierr = PetscFree(lrows);CHKERRQ(ierr);
552 
553   /* wait on sends */
554   if (nsends) {
555     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
556     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
557     ierr = PetscFree(send_status);CHKERRQ(ierr);
558   }
559   ierr = PetscFree(send_waits);CHKERRQ(ierr);
560   ierr = PetscFree(svalues);CHKERRQ(ierr);
561 
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMult_MPIAIJ"
567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
568 {
569   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
570   int        ierr,nt;
571 
572   PetscFunctionBegin;
573   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
574   if (nt != A->n) {
575     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
576   }
577   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
578   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
579   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
580   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMultAdd_MPIAIJ"
586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr;
590 
591   PetscFunctionBegin;
592   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
594   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
595   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602 {
603   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604   int        ierr;
605 
606   PetscFunctionBegin;
607   /* do nondiagonal part */
608   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
609   /* send it on its way */
610   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
611   /* do local part */
612   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
613   /* receive remote parts: note this assumes the values are not actually */
614   /* inserted in yy until the next line, which is true for my implementation*/
615   /* but is not perhaps always true. */
616   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 EXTERN_C_BEGIN
621 #undef __FUNCT__
622 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
623 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
624 {
625   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
626   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
627   IS         Me,Notme;
628   int        M,N,first,last,*notme,i, ierr;
629 
630   PetscFunctionBegin;
631 
632   /* Easy test: symmetric diagonal block */
633   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
634   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
635   if (!*f) PetscFunctionReturn(0);
636 
637   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
638   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
639   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
640   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
641   for (i=0; i<first; i++) notme[i] = i;
642   for (i=last; i<M; i++) notme[i-last+first] = i;
643   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
644   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
645   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
646   Aoff = Aoffs[0];
647   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
648   Boff = Boffs[0];
649   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
650   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
651   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
652   ierr = ISDestroy(Me); CHKERRQ(ierr);
653   ierr = ISDestroy(Notme); CHKERRQ(ierr);
654 
655   PetscFunctionReturn(0);
656 }
657 EXTERN_C_END
658 
659 #undef __FUNCT__
660 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
661 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
662 {
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664   int        ierr;
665 
666   PetscFunctionBegin;
667   /* do nondiagonal part */
668   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
669   /* send it on its way */
670   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
671   /* do local part */
672   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
673   /* receive remote parts: note this assumes the values are not actually */
674   /* inserted in yy until the next line, which is true for my implementation*/
675   /* but is not perhaps always true. */
676   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 /*
681   This only works correctly for square matrices where the subblock A->A is the
682    diagonal block
683 */
684 #undef __FUNCT__
685 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
686 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
687 {
688   int        ierr;
689   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
690 
691   PetscFunctionBegin;
692   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
693   if (a->rstart != a->cstart || a->rend != a->cend) {
694     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
695   }
696   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatScale_MPIAIJ"
702 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
703 {
704   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
705   int        ierr;
706 
707   PetscFunctionBegin;
708   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
709   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatDestroy_MPIAIJ"
715 int MatDestroy_MPIAIJ(Mat mat)
716 {
717   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
718   int        ierr;
719 
720   PetscFunctionBegin;
721 #if defined(PETSC_USE_LOG)
722   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
723 #endif
724   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
725   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
726   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
727   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
728 #if defined (PETSC_USE_CTABLE)
729   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
730 #else
731   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
732 #endif
733   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
734   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
735   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
736   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
737   ierr = PetscFree(aij);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIAIJ_Binary"
743 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
746   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
747   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
748   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
749   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
750   PetscScalar       *column_values;
751 
752   PetscFunctionBegin;
753   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
754   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
755   nz   = A->nz + B->nz;
756   if (rank == 0) {
757     header[0] = MAT_FILE_COOKIE;
758     header[1] = mat->M;
759     header[2] = mat->N;
760     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
761     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
762     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
763     /* get largest number of rows any processor has */
764     rlen = mat->m;
765     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
766     for (i=1; i<size; i++) {
767       rlen = PetscMax(rlen,range[i+1] - range[i]);
768     }
769   } else {
770     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
771     rlen = mat->m;
772   }
773 
774   /* load up the local row counts */
775   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
776   for (i=0; i<mat->m; i++) {
777     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
778   }
779 
780   /* store the row lengths to the file */
781   if (rank == 0) {
782     MPI_Status status;
783     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
784     for (i=1; i<size; i++) {
785       rlen = range[i+1] - range[i];
786       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
787       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
788     }
789   } else {
790     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
791   }
792   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
793 
794   /* load up the local column indices */
795   nzmax = nz; /* )th processor needs space a largest processor needs */
796   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
797   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
798   cnt  = 0;
799   for (i=0; i<mat->m; i++) {
800     for (j=B->i[i]; j<B->i[i+1]; j++) {
801       if ( (col = garray[B->j[j]]) > cstart) break;
802       column_indices[cnt++] = col;
803     }
804     for (k=A->i[i]; k<A->i[i+1]; k++) {
805       column_indices[cnt++] = A->j[k] + cstart;
806     }
807     for (; j<B->i[i+1]; j++) {
808       column_indices[cnt++] = garray[B->j[j]];
809     }
810   }
811   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
812 
813   /* store the column indices to the file */
814   if (rank == 0) {
815     MPI_Status status;
816     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
817     for (i=1; i<size; i++) {
818       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
819       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
820       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
822     }
823   } else {
824     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
825     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(column_indices);CHKERRQ(ierr);
828 
829   /* load up the local column values */
830   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
831   cnt  = 0;
832   for (i=0; i<mat->m; i++) {
833     for (j=B->i[i]; j<B->i[i+1]; j++) {
834       if ( garray[B->j[j]] > cstart) break;
835       column_values[cnt++] = B->a[j];
836     }
837     for (k=A->i[i]; k<A->i[i+1]; k++) {
838       column_values[cnt++] = A->a[k];
839     }
840     for (; j<B->i[i+1]; j++) {
841       column_values[cnt++] = B->a[j];
842     }
843   }
844   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
845 
846   /* store the column values to the file */
847   if (rank == 0) {
848     MPI_Status status;
849     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
850     for (i=1; i<size; i++) {
851       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
852       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
853       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
855     }
856   } else {
857     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
858     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(column_values);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
866 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
867 {
868   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
869   int               ierr,rank = aij->rank,size = aij->size;
870   PetscTruth        isdraw,isascii,flg,isbinary;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
876   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
878   if (isascii) {
879     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
880     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
881       MatInfo info;
882       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
883       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
884       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
885       if (flg) {
886         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
887 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
888       } else {
889         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
890 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
891       }
892       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
893       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
894       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
895       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
896       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
897       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
898       PetscFunctionReturn(0);
899     } else if (format == PETSC_VIEWER_ASCII_INFO) {
900       PetscFunctionReturn(0);
901     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
902       PetscFunctionReturn(0);
903     }
904   } else if (isbinary) {
905     if (size == 1) {
906       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
907       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
908     } else {
909       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
910     }
911     PetscFunctionReturn(0);
912   } else if (isdraw) {
913     PetscDraw  draw;
914     PetscTruth isnull;
915     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
916     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
917   }
918 
919   if (size == 1) {
920     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
921     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
922   } else {
923     /* assemble the entire matrix onto first processor. */
924     Mat         A;
925     Mat_SeqAIJ *Aloc;
926     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
927     PetscScalar *a;
928 
929     if (!rank) {
930       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
931     } else {
932       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
933     }
934     PetscLogObjectParent(mat,A);
935 
936     /* copy over the A part */
937     Aloc = (Mat_SeqAIJ*)aij->A->data;
938     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
939     row = aij->rstart;
940     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
941     for (i=0; i<m; i++) {
942       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
943       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
944     }
945     aj = Aloc->j;
946     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
947 
948     /* copy over the B part */
949     Aloc = (Mat_SeqAIJ*)aij->B->data;
950     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
951     row  = aij->rstart;
952     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
953     ct   = cols;
954     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
955     for (i=0; i<m; i++) {
956       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
957       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
958     }
959     ierr = PetscFree(ct);CHKERRQ(ierr);
960     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962     /*
963        Everyone has to call to draw the matrix since the graphics waits are
964        synchronized across all processors that share the PetscDraw object
965     */
966     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
967     if (!rank) {
968       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
969       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
970     }
971     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
972     ierr = MatDestroy(A);CHKERRQ(ierr);
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatView_MPIAIJ"
979 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
980 {
981   int        ierr;
982   PetscTruth isascii,isdraw,issocket,isbinary;
983 
984   PetscFunctionBegin;
985   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
986   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
987   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
988   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
989   if (isascii || isdraw || isbinary || issocket) {
990     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
991   } else {
992     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatRelax_MPIAIJ"
1001 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1002 {
1003   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1004   int          ierr;
1005   Vec          bb1;
1006   PetscScalar  mone=-1.0;
1007 
1008   PetscFunctionBegin;
1009   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1010 
1011   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1012 
1013   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1014     if (flag & SOR_ZERO_INITIAL_GUESS) {
1015       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1016       its--;
1017     }
1018 
1019     while (its--) {
1020       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1021       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1022 
1023       /* update rhs: bb1 = bb - B*x */
1024       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1025       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1026 
1027       /* local sweep */
1028       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1029       CHKERRQ(ierr);
1030     }
1031   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1032     if (flag & SOR_ZERO_INITIAL_GUESS) {
1033       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1034       its--;
1035     }
1036     while (its--) {
1037       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039 
1040       /* update rhs: bb1 = bb - B*x */
1041       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1042       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1043 
1044       /* local sweep */
1045       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1046       CHKERRQ(ierr);
1047     }
1048   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1049     if (flag & SOR_ZERO_INITIAL_GUESS) {
1050       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1051       its--;
1052     }
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else {
1066     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1067   }
1068 
1069   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1075 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1076 {
1077   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1078   Mat        A = mat->A,B = mat->B;
1079   int        ierr;
1080   PetscReal  isend[5],irecv[5];
1081 
1082   PetscFunctionBegin;
1083   info->block_size     = 1.0;
1084   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1085   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1086   isend[3] = info->memory;  isend[4] = info->mallocs;
1087   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1088   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1089   isend[3] += info->memory;  isend[4] += info->mallocs;
1090   if (flag == MAT_LOCAL) {
1091     info->nz_used      = isend[0];
1092     info->nz_allocated = isend[1];
1093     info->nz_unneeded  = isend[2];
1094     info->memory       = isend[3];
1095     info->mallocs      = isend[4];
1096   } else if (flag == MAT_GLOBAL_MAX) {
1097     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1098     info->nz_used      = irecv[0];
1099     info->nz_allocated = irecv[1];
1100     info->nz_unneeded  = irecv[2];
1101     info->memory       = irecv[3];
1102     info->mallocs      = irecv[4];
1103   } else if (flag == MAT_GLOBAL_SUM) {
1104     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1105     info->nz_used      = irecv[0];
1106     info->nz_allocated = irecv[1];
1107     info->nz_unneeded  = irecv[2];
1108     info->memory       = irecv[3];
1109     info->mallocs      = irecv[4];
1110   }
1111   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1112   info->fill_ratio_needed = 0;
1113   info->factor_mallocs    = 0;
1114   info->rows_global       = (double)matin->M;
1115   info->columns_global    = (double)matin->N;
1116   info->rows_local        = (double)matin->m;
1117   info->columns_local     = (double)matin->N;
1118 
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatSetOption_MPIAIJ"
1124 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1125 {
1126   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1127   int        ierr;
1128 
1129   PetscFunctionBegin;
1130   switch (op) {
1131   case MAT_NO_NEW_NONZERO_LOCATIONS:
1132   case MAT_YES_NEW_NONZERO_LOCATIONS:
1133   case MAT_COLUMNS_UNSORTED:
1134   case MAT_COLUMNS_SORTED:
1135   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1136   case MAT_KEEP_ZEROED_ROWS:
1137   case MAT_NEW_NONZERO_LOCATION_ERR:
1138   case MAT_USE_INODES:
1139   case MAT_DO_NOT_USE_INODES:
1140   case MAT_IGNORE_ZERO_ENTRIES:
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_ROW_ORIENTED:
1145     a->roworiented = PETSC_TRUE;
1146     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1147     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1148     break;
1149   case MAT_ROWS_SORTED:
1150   case MAT_ROWS_UNSORTED:
1151   case MAT_YES_NEW_DIAGONALS:
1152     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1153     break;
1154   case MAT_COLUMN_ORIENTED:
1155     a->roworiented = PETSC_FALSE;
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158     break;
1159   case MAT_IGNORE_OFF_PROC_ENTRIES:
1160     a->donotstash = PETSC_TRUE;
1161     break;
1162   case MAT_NO_NEW_DIAGONALS:
1163     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1164   default:
1165     SETERRQ(PETSC_ERR_SUP,"unknown option");
1166   }
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatGetRow_MPIAIJ"
1172 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1173 {
1174   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1175   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1176   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1177   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1178   int          *cmap,*idx_p;
1179 
1180   PetscFunctionBegin;
1181   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1182   mat->getrowactive = PETSC_TRUE;
1183 
1184   if (!mat->rowvalues && (idx || v)) {
1185     /*
1186         allocate enough space to hold information from the longest row.
1187     */
1188     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1189     int     max = 1,tmp;
1190     for (i=0; i<matin->m; i++) {
1191       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1192       if (max < tmp) { max = tmp; }
1193     }
1194     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1195     mat->rowindices = (int*)(mat->rowvalues + max);
1196   }
1197 
1198   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1199   lrow = row - rstart;
1200 
1201   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1202   if (!v)   {pvA = 0; pvB = 0;}
1203   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1204   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1205   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1206   nztot = nzA + nzB;
1207 
1208   cmap  = mat->garray;
1209   if (v  || idx) {
1210     if (nztot) {
1211       /* Sort by increasing column numbers, assuming A and B already sorted */
1212       int imark = -1;
1213       if (v) {
1214         *v = v_p = mat->rowvalues;
1215         for (i=0; i<nzB; i++) {
1216           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1217           else break;
1218         }
1219         imark = i;
1220         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1221         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1222       }
1223       if (idx) {
1224         *idx = idx_p = mat->rowindices;
1225         if (imark > -1) {
1226           for (i=0; i<imark; i++) {
1227             idx_p[i] = cmap[cworkB[i]];
1228           }
1229         } else {
1230           for (i=0; i<nzB; i++) {
1231             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1232             else break;
1233           }
1234           imark = i;
1235         }
1236         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1237         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1238       }
1239     } else {
1240       if (idx) *idx = 0;
1241       if (v)   *v   = 0;
1242     }
1243   }
1244   *nz = nztot;
1245   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1246   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1252 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1253 {
1254   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1255 
1256   PetscFunctionBegin;
1257   if (aij->getrowactive == PETSC_FALSE) {
1258     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1259   }
1260   aij->getrowactive = PETSC_FALSE;
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatNorm_MPIAIJ"
1266 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1267 {
1268   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1269   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1270   int          ierr,i,j,cstart = aij->cstart;
1271   PetscReal    sum = 0.0;
1272   PetscScalar  *v;
1273 
1274   PetscFunctionBegin;
1275   if (aij->size == 1) {
1276     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1277   } else {
1278     if (type == NORM_FROBENIUS) {
1279       v = amat->a;
1280       for (i=0; i<amat->nz; i++) {
1281 #if defined(PETSC_USE_COMPLEX)
1282         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1283 #else
1284         sum += (*v)*(*v); v++;
1285 #endif
1286       }
1287       v = bmat->a;
1288       for (i=0; i<bmat->nz; i++) {
1289 #if defined(PETSC_USE_COMPLEX)
1290         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1291 #else
1292         sum += (*v)*(*v); v++;
1293 #endif
1294       }
1295       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1296       *norm = sqrt(*norm);
1297     } else if (type == NORM_1) { /* max column norm */
1298       PetscReal *tmp,*tmp2;
1299       int    *jj,*garray = aij->garray;
1300       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1301       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1302       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1303       *norm = 0.0;
1304       v = amat->a; jj = amat->j;
1305       for (j=0; j<amat->nz; j++) {
1306         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1307       }
1308       v = bmat->a; jj = bmat->j;
1309       for (j=0; j<bmat->nz; j++) {
1310         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1311       }
1312       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1313       for (j=0; j<mat->N; j++) {
1314         if (tmp2[j] > *norm) *norm = tmp2[j];
1315       }
1316       ierr = PetscFree(tmp);CHKERRQ(ierr);
1317       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1318     } else if (type == NORM_INFINITY) { /* max row norm */
1319       PetscReal ntemp = 0.0;
1320       for (j=0; j<aij->A->m; j++) {
1321         v = amat->a + amat->i[j];
1322         sum = 0.0;
1323         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1324           sum += PetscAbsScalar(*v); v++;
1325         }
1326         v = bmat->a + bmat->i[j];
1327         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1328           sum += PetscAbsScalar(*v); v++;
1329         }
1330         if (sum > ntemp) ntemp = sum;
1331       }
1332       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1333     } else {
1334       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1335     }
1336   }
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatTranspose_MPIAIJ"
1342 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1343 {
1344   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1345   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1346   int          ierr;
1347   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1348   Mat          B;
1349   PetscScalar  *array;
1350 
1351   PetscFunctionBegin;
1352   if (!matout && M != N) {
1353     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1354   }
1355 
1356   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1357 
1358   /* copy over the A part */
1359   Aloc = (Mat_SeqAIJ*)a->A->data;
1360   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1361   row = a->rstart;
1362   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1363   for (i=0; i<m; i++) {
1364     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1365     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1366   }
1367   aj = Aloc->j;
1368   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1369 
1370   /* copy over the B part */
1371   Aloc = (Mat_SeqAIJ*)a->B->data;
1372   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1373   row  = a->rstart;
1374   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1375   ct   = cols;
1376   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1377   for (i=0; i<m; i++) {
1378     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1379     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1380   }
1381   ierr = PetscFree(ct);CHKERRQ(ierr);
1382   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1384   if (matout) {
1385     *matout = B;
1386   } else {
1387     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1394 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1395 {
1396   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1397   Mat        a = aij->A,b = aij->B;
1398   int        ierr,s1,s2,s3;
1399 
1400   PetscFunctionBegin;
1401   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1402   if (rr) {
1403     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1404     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1405     /* Overlap communication with computation. */
1406     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1407   }
1408   if (ll) {
1409     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1410     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1411     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1412   }
1413   /* scale  the diagonal block */
1414   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1415 
1416   if (rr) {
1417     /* Do a scatter end and then right scale the off-diagonal block */
1418     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1419     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1420   }
1421 
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1428 int MatPrintHelp_MPIAIJ(Mat A)
1429 {
1430   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1431   int        ierr;
1432 
1433   PetscFunctionBegin;
1434   if (!a->rank) {
1435     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1442 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1443 {
1444   PetscFunctionBegin;
1445   *bs = 1;
1446   PetscFunctionReturn(0);
1447 }
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1450 int MatSetUnfactored_MPIAIJ(Mat A)
1451 {
1452   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1453   int        ierr;
1454 
1455   PetscFunctionBegin;
1456   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatEqual_MPIAIJ"
1462 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1463 {
1464   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1465   Mat        a,b,c,d;
1466   PetscTruth flg;
1467   int        ierr;
1468 
1469   PetscFunctionBegin;
1470   a = matA->A; b = matA->B;
1471   c = matB->A; d = matB->B;
1472 
1473   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1474   if (flg == PETSC_TRUE) {
1475     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1476   }
1477   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatCopy_MPIAIJ"
1483 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1484 {
1485   int        ierr;
1486   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1487   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1488 
1489   PetscFunctionBegin;
1490   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1491   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1492     /* because of the column compression in the off-processor part of the matrix a->B,
1493        the number of columns in a->B and b->B may be different, hence we cannot call
1494        the MatCopy() directly on the two parts. If need be, we can provide a more
1495        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1496        then copying the submatrices */
1497     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1498   } else {
1499     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1500     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1507 int MatSetUpPreallocation_MPIAIJ(Mat A)
1508 {
1509   int        ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #include "petscblaslapack.h"
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatAXPY_MPIAIJ"
1519 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1520 {
1521   int        ierr,one=1,i;
1522   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1523   Mat_SeqAIJ *x,*y;
1524 
1525   PetscFunctionBegin;
1526   if (str == SAME_NONZERO_PATTERN) {
1527     x = (Mat_SeqAIJ *)xx->A->data;
1528     y = (Mat_SeqAIJ *)yy->A->data;
1529     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1530     x = (Mat_SeqAIJ *)xx->B->data;
1531     y = (Mat_SeqAIJ *)yy->B->data;
1532     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1533   } else if (str == SUBSET_NONZERO_PATTERN) {
1534     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1535 
1536     x = (Mat_SeqAIJ *)xx->B->data;
1537     y = (Mat_SeqAIJ *)yy->B->data;
1538     if (y->xtoy && y->XtoY != xx->B) {
1539       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1540       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1541     }
1542     if (!y->xtoy) { /* get xtoy */
1543       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1544       y->XtoY = xx->B;
1545     }
1546     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1547   } else {
1548     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1549   }
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 /* -------------------------------------------------------------------*/
1554 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1555        MatGetRow_MPIAIJ,
1556        MatRestoreRow_MPIAIJ,
1557        MatMult_MPIAIJ,
1558 /* 4*/ MatMultAdd_MPIAIJ,
1559        MatMultTranspose_MPIAIJ,
1560        MatMultTransposeAdd_MPIAIJ,
1561        0,
1562        0,
1563        0,
1564 /*10*/ 0,
1565        0,
1566        0,
1567        MatRelax_MPIAIJ,
1568        MatTranspose_MPIAIJ,
1569 /*15*/ MatGetInfo_MPIAIJ,
1570        MatEqual_MPIAIJ,
1571        MatGetDiagonal_MPIAIJ,
1572        MatDiagonalScale_MPIAIJ,
1573        MatNorm_MPIAIJ,
1574 /*20*/ MatAssemblyBegin_MPIAIJ,
1575        MatAssemblyEnd_MPIAIJ,
1576        0,
1577        MatSetOption_MPIAIJ,
1578        MatZeroEntries_MPIAIJ,
1579 /*25*/ MatZeroRows_MPIAIJ,
1580 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1581        MatLUFactorSymbolic_MPIAIJ_TFS,
1582 #else
1583        0,
1584 #endif
1585        0,
1586        0,
1587        0,
1588 /*30*/ MatSetUpPreallocation_MPIAIJ,
1589        0,
1590        0,
1591        0,
1592        0,
1593 /*35*/ MatDuplicate_MPIAIJ,
1594        0,
1595        0,
1596        0,
1597        0,
1598 /*40*/ MatAXPY_MPIAIJ,
1599        MatGetSubMatrices_MPIAIJ,
1600        MatIncreaseOverlap_MPIAIJ,
1601        MatGetValues_MPIAIJ,
1602        MatCopy_MPIAIJ,
1603 /*45*/ MatPrintHelp_MPIAIJ,
1604        MatScale_MPIAIJ,
1605        0,
1606        0,
1607        0,
1608 /*50*/ MatGetBlockSize_MPIAIJ,
1609        0,
1610        0,
1611        0,
1612        0,
1613 /*55*/ MatFDColoringCreate_MPIAIJ,
1614        0,
1615        MatSetUnfactored_MPIAIJ,
1616        0,
1617        0,
1618 /*60*/ MatGetSubMatrix_MPIAIJ,
1619        MatDestroy_MPIAIJ,
1620        MatView_MPIAIJ,
1621        MatGetPetscMaps_Petsc,
1622        0,
1623 /*65*/ 0,
1624        0,
1625        0,
1626        0,
1627        0,
1628 /*70*/ 0,
1629        0,
1630        MatSetColoring_MPIAIJ,
1631        MatSetValuesAdic_MPIAIJ,
1632        MatSetValuesAdifor_MPIAIJ,
1633 /*75*/ 0,
1634        0,
1635        0,
1636        0,
1637        0,
1638 /*80*/ 0,
1639        0,
1640        0,
1641        0,
1642        0,
1643 /*85*/ MatLoad_MPIAIJ};
1644 
1645 /* ----------------------------------------------------------------------------------------*/
1646 
1647 EXTERN_C_BEGIN
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1650 int MatStoreValues_MPIAIJ(Mat mat)
1651 {
1652   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1653   int        ierr;
1654 
1655   PetscFunctionBegin;
1656   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1657   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 EXTERN_C_END
1661 
1662 EXTERN_C_BEGIN
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1665 int MatRetrieveValues_MPIAIJ(Mat mat)
1666 {
1667   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1668   int        ierr;
1669 
1670   PetscFunctionBegin;
1671   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1672   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 EXTERN_C_END
1676 
1677 #include "petscpc.h"
1678 EXTERN_C_BEGIN
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1681 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1682 {
1683   Mat_MPIAIJ   *b;
1684   int          ierr,i;
1685 
1686   PetscFunctionBegin;
1687   B->preallocated = PETSC_TRUE;
1688   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1689   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1690   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1691   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1692   if (d_nnz) {
1693     for (i=0; i<B->m; i++) {
1694       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1695     }
1696   }
1697   if (o_nnz) {
1698     for (i=0; i<B->m; i++) {
1699       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1700     }
1701   }
1702   b = (Mat_MPIAIJ*)B->data;
1703 
1704   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1705   PetscLogObjectParent(B,b->A);
1706   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1707   PetscLogObjectParent(B,b->B);
1708 
1709   PetscFunctionReturn(0);
1710 }
1711 EXTERN_C_END
1712 
1713 /*MC
1714    MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1715 
1716    Options Database Keys:
1717 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1718 
1719   Level: beginner
1720 
1721 .seealso: MatCreateMPIAIJ
1722 M*/
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatCreate_MPIAIJ"
1727 int MatCreate_MPIAIJ(Mat B)
1728 {
1729   Mat_MPIAIJ *b;
1730   int        ierr,i,size;
1731 
1732   PetscFunctionBegin;
1733   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1734 
1735   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1736   B->data         = (void*)b;
1737   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1738   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1739   B->factor       = 0;
1740   B->assembled    = PETSC_FALSE;
1741   B->mapping      = 0;
1742 
1743   B->insertmode      = NOT_SET_VALUES;
1744   b->size            = size;
1745   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1746 
1747   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1748   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1749 
1750   /* the information in the maps duplicates the information computed below, eventually
1751      we should remove the duplicate information that is not contained in the maps */
1752   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1753   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1754 
1755   /* build local table of row and column ownerships */
1756   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1757   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1758   b->cowners = b->rowners + b->size + 2;
1759   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1760   b->rowners[0] = 0;
1761   for (i=2; i<=b->size; i++) {
1762     b->rowners[i] += b->rowners[i-1];
1763   }
1764   b->rstart = b->rowners[b->rank];
1765   b->rend   = b->rowners[b->rank+1];
1766   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1767   b->cowners[0] = 0;
1768   for (i=2; i<=b->size; i++) {
1769     b->cowners[i] += b->cowners[i-1];
1770   }
1771   b->cstart = b->cowners[b->rank];
1772   b->cend   = b->cowners[b->rank+1];
1773 
1774   /* build cache for off array entries formed */
1775   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1776   b->donotstash  = PETSC_FALSE;
1777   b->colmap      = 0;
1778   b->garray      = 0;
1779   b->roworiented = PETSC_TRUE;
1780 
1781   /* stuff used for matrix vector multiply */
1782   b->lvec      = PETSC_NULL;
1783   b->Mvctx     = PETSC_NULL;
1784 
1785   /* stuff for MatGetRow() */
1786   b->rowindices   = 0;
1787   b->rowvalues    = 0;
1788   b->getrowactive = PETSC_FALSE;
1789 
1790   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1791                                      "MatStoreValues_MPIAIJ",
1792                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1794                                      "MatRetrieveValues_MPIAIJ",
1795                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1797 				     "MatGetDiagonalBlock_MPIAIJ",
1798                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1800 				     "MatIsSymmetric_MPIAIJ",
1801 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1803 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1804 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1806 				     "MatDiagonalScaleLocal_MPIAIJ",
1807 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 EXTERN_C_END
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1814 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1815 {
1816   Mat        mat;
1817   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1818   int        ierr;
1819 
1820   PetscFunctionBegin;
1821   *newmat       = 0;
1822   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1823   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1824   a    = (Mat_MPIAIJ*)mat->data;
1825   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1826   mat->factor       = matin->factor;
1827   mat->assembled    = PETSC_TRUE;
1828   mat->insertmode   = NOT_SET_VALUES;
1829   mat->preallocated = PETSC_TRUE;
1830 
1831   a->rstart       = oldmat->rstart;
1832   a->rend         = oldmat->rend;
1833   a->cstart       = oldmat->cstart;
1834   a->cend         = oldmat->cend;
1835   a->size         = oldmat->size;
1836   a->rank         = oldmat->rank;
1837   a->donotstash   = oldmat->donotstash;
1838   a->roworiented  = oldmat->roworiented;
1839   a->rowindices   = 0;
1840   a->rowvalues    = 0;
1841   a->getrowactive = PETSC_FALSE;
1842 
1843   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1844   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1845   if (oldmat->colmap) {
1846 #if defined (PETSC_USE_CTABLE)
1847     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1848 #else
1849     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1850     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1851     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1852 #endif
1853   } else a->colmap = 0;
1854   if (oldmat->garray) {
1855     int len;
1856     len  = oldmat->B->n;
1857     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1858     PetscLogObjectMemory(mat,len*sizeof(int));
1859     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1860   } else a->garray = 0;
1861 
1862   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1863   PetscLogObjectParent(mat,a->lvec);
1864   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1865   PetscLogObjectParent(mat,a->Mvctx);
1866   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1867   PetscLogObjectParent(mat,a->A);
1868   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1869   PetscLogObjectParent(mat,a->B);
1870   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1871   *newmat = mat;
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #include "petscsys.h"
1876 
1877 EXTERN_C_BEGIN
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatLoad_MPIAIJ"
1880 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1881 {
1882   Mat          A;
1883   PetscScalar  *vals,*svals;
1884   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1885   MPI_Status   status;
1886   int          i,nz,ierr,j,rstart,rend,fd;
1887   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1888   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1889   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1890 
1891   PetscFunctionBegin;
1892   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1893   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1894   if (!rank) {
1895     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1896     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1897     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1898     if (header[3] < 0) {
1899       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1900     }
1901   }
1902 
1903   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1904   M = header[1]; N = header[2];
1905   /* determine ownership of all rows */
1906   m = M/size + ((M % size) > rank);
1907   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1908   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1909   rowners[0] = 0;
1910   for (i=2; i<=size; i++) {
1911     rowners[i] += rowners[i-1];
1912   }
1913   rstart = rowners[rank];
1914   rend   = rowners[rank+1];
1915 
1916   /* distribute row lengths to all processors */
1917   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1918   offlens = ourlens + (rend-rstart);
1919   if (!rank) {
1920     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1921     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1922     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1923     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1924     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1925     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1926   } else {
1927     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1928   }
1929 
1930   if (!rank) {
1931     /* calculate the number of nonzeros on each processor */
1932     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1933     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1934     for (i=0; i<size; i++) {
1935       for (j=rowners[i]; j< rowners[i+1]; j++) {
1936         procsnz[i] += rowlengths[j];
1937       }
1938     }
1939     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1940 
1941     /* determine max buffer needed and allocate it */
1942     maxnz = 0;
1943     for (i=0; i<size; i++) {
1944       maxnz = PetscMax(maxnz,procsnz[i]);
1945     }
1946     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1947 
1948     /* read in my part of the matrix column indices  */
1949     nz   = procsnz[0];
1950     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1951     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1952 
1953     /* read in every one elses and ship off */
1954     for (i=1; i<size; i++) {
1955       nz   = procsnz[i];
1956       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1957       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1958     }
1959     ierr = PetscFree(cols);CHKERRQ(ierr);
1960   } else {
1961     /* determine buffer space needed for message */
1962     nz = 0;
1963     for (i=0; i<m; i++) {
1964       nz += ourlens[i];
1965     }
1966     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1967 
1968     /* receive message of column indices*/
1969     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1970     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1971     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1972   }
1973 
1974   /* determine column ownership if matrix is not square */
1975   if (N != M) {
1976     n      = N/size + ((N % size) > rank);
1977     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1978     cstart = cend - n;
1979   } else {
1980     cstart = rstart;
1981     cend   = rend;
1982     n      = cend - cstart;
1983   }
1984 
1985   /* loop over local rows, determining number of off diagonal entries */
1986   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1987   jj = 0;
1988   for (i=0; i<m; i++) {
1989     for (j=0; j<ourlens[i]; j++) {
1990       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1991       jj++;
1992     }
1993   }
1994 
1995   /* create our matrix */
1996   for (i=0; i<m; i++) {
1997     ourlens[i] -= offlens[i];
1998   }
1999   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2000   ierr = MatSetType(A,type);CHKERRQ(ierr);
2001   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2002 
2003   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2004   for (i=0; i<m; i++) {
2005     ourlens[i] += offlens[i];
2006   }
2007 
2008   if (!rank) {
2009     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2010 
2011     /* read in my part of the matrix numerical values  */
2012     nz   = procsnz[0];
2013     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2014 
2015     /* insert into matrix */
2016     jj      = rstart;
2017     smycols = mycols;
2018     svals   = vals;
2019     for (i=0; i<m; i++) {
2020       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2021       smycols += ourlens[i];
2022       svals   += ourlens[i];
2023       jj++;
2024     }
2025 
2026     /* read in other processors and ship out */
2027     for (i=1; i<size; i++) {
2028       nz   = procsnz[i];
2029       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2030       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2031     }
2032     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2033   } else {
2034     /* receive numeric values */
2035     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2036 
2037     /* receive message of values*/
2038     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2039     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2040     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2041 
2042     /* insert into matrix */
2043     jj      = rstart;
2044     smycols = mycols;
2045     svals   = vals;
2046     for (i=0; i<m; i++) {
2047       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2048       smycols += ourlens[i];
2049       svals   += ourlens[i];
2050       jj++;
2051     }
2052   }
2053   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2054   ierr = PetscFree(vals);CHKERRQ(ierr);
2055   ierr = PetscFree(mycols);CHKERRQ(ierr);
2056   ierr = PetscFree(rowners);CHKERRQ(ierr);
2057 
2058   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2059   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   *newmat = A;
2061   PetscFunctionReturn(0);
2062 }
2063 EXTERN_C_END
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2067 /*
2068     Not great since it makes two copies of the submatrix, first an SeqAIJ
2069   in local and then by concatenating the local matrices the end result.
2070   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2071 */
2072 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2073 {
2074   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2075   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2076   Mat          *local,M,Mreuse;
2077   PetscScalar  *vwork,*aa;
2078   MPI_Comm     comm = mat->comm;
2079   Mat_SeqAIJ   *aij;
2080 
2081 
2082   PetscFunctionBegin;
2083   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2084   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2085 
2086   if (call ==  MAT_REUSE_MATRIX) {
2087     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2088     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2089     local = &Mreuse;
2090     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2091   } else {
2092     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2093     Mreuse = *local;
2094     ierr   = PetscFree(local);CHKERRQ(ierr);
2095   }
2096 
2097   /*
2098       m - number of local rows
2099       n - number of columns (same on all processors)
2100       rstart - first row in new global matrix generated
2101   */
2102   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2103   if (call == MAT_INITIAL_MATRIX) {
2104     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2105     ii  = aij->i;
2106     jj  = aij->j;
2107 
2108     /*
2109         Determine the number of non-zeros in the diagonal and off-diagonal
2110         portions of the matrix in order to do correct preallocation
2111     */
2112 
2113     /* first get start and end of "diagonal" columns */
2114     if (csize == PETSC_DECIDE) {
2115       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2116       if (mglobal == n) { /* square matrix */
2117 	nlocal = m;
2118       } else {
2119         nlocal = n/size + ((n % size) > rank);
2120       }
2121     } else {
2122       nlocal = csize;
2123     }
2124     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2125     rstart = rend - nlocal;
2126     if (rank == size - 1 && rend != n) {
2127       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2128     }
2129 
2130     /* next, compute all the lengths */
2131     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2132     olens = dlens + m;
2133     for (i=0; i<m; i++) {
2134       jend = ii[i+1] - ii[i];
2135       olen = 0;
2136       dlen = 0;
2137       for (j=0; j<jend; j++) {
2138         if (*jj < rstart || *jj >= rend) olen++;
2139         else dlen++;
2140         jj++;
2141       }
2142       olens[i] = olen;
2143       dlens[i] = dlen;
2144     }
2145     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2146     ierr = PetscFree(dlens);CHKERRQ(ierr);
2147   } else {
2148     int ml,nl;
2149 
2150     M = *newmat;
2151     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2152     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2153     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2154     /*
2155          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2156        rather than the slower MatSetValues().
2157     */
2158     M->was_assembled = PETSC_TRUE;
2159     M->assembled     = PETSC_FALSE;
2160   }
2161   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2162   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2163   ii  = aij->i;
2164   jj  = aij->j;
2165   aa  = aij->a;
2166   for (i=0; i<m; i++) {
2167     row   = rstart + i;
2168     nz    = ii[i+1] - ii[i];
2169     cwork = jj;     jj += nz;
2170     vwork = aa;     aa += nz;
2171     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2172   }
2173 
2174   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2175   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   *newmat = M;
2177 
2178   /* save submatrix used in processor for next request */
2179   if (call ==  MAT_INITIAL_MATRIX) {
2180     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2181     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2182   }
2183 
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2189 /*@C
2190    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2191    (the default parallel PETSc format).  For good matrix assembly performance
2192    the user should preallocate the matrix storage by setting the parameters
2193    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2194    performance can be increased by more than a factor of 50.
2195 
2196    Collective on MPI_Comm
2197 
2198    Input Parameters:
2199 +  A - the matrix
2200 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2201            (same value is used for all local rows)
2202 .  d_nnz - array containing the number of nonzeros in the various rows of the
2203            DIAGONAL portion of the local submatrix (possibly different for each row)
2204            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2205            The size of this array is equal to the number of local rows, i.e 'm'.
2206            You must leave room for the diagonal entry even if it is zero.
2207 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2208            submatrix (same value is used for all local rows).
2209 -  o_nnz - array containing the number of nonzeros in the various rows of the
2210            OFF-DIAGONAL portion of the local submatrix (possibly different for
2211            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2212            structure. The size of this array is equal to the number
2213            of local rows, i.e 'm'.
2214 
2215    The AIJ format (also called the Yale sparse matrix format or
2216    compressed row storage), is fully compatible with standard Fortran 77
2217    storage.  That is, the stored row and column indices can begin at
2218    either one (as in Fortran) or zero.  See the users manual for details.
2219 
2220    The user MUST specify either the local or global matrix dimensions
2221    (possibly both).
2222 
2223    The parallel matrix is partitioned such that the first m0 rows belong to
2224    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2225    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2226 
2227    The DIAGONAL portion of the local submatrix of a processor can be defined
2228    as the submatrix which is obtained by extraction the part corresponding
2229    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2230    first row that belongs to the processor, and r2 is the last row belonging
2231    to the this processor. This is a square mxm matrix. The remaining portion
2232    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2233 
2234    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2235 
2236    By default, this format uses inodes (identical nodes) when possible.
2237    We search for consecutive rows with the same nonzero structure, thereby
2238    reusing matrix information to achieve increased efficiency.
2239 
2240    Options Database Keys:
2241 +  -mat_aij_no_inode  - Do not use inodes
2242 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2243 -  -mat_aij_oneindex - Internally use indexing starting at 1
2244         rather than 0.  Note that when calling MatSetValues(),
2245         the user still MUST index entries starting at 0!
2246 
2247    Example usage:
2248 
2249    Consider the following 8x8 matrix with 34 non-zero values, that is
2250    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2251    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2252    as follows:
2253 
2254 .vb
2255             1  2  0  |  0  3  0  |  0  4
2256     Proc0   0  5  6  |  7  0  0  |  8  0
2257             9  0 10  | 11  0  0  | 12  0
2258     -------------------------------------
2259            13  0 14  | 15 16 17  |  0  0
2260     Proc1   0 18  0  | 19 20 21  |  0  0
2261             0  0  0  | 22 23  0  | 24  0
2262     -------------------------------------
2263     Proc2  25 26 27  |  0  0 28  | 29  0
2264            30  0  0  | 31 32 33  |  0 34
2265 .ve
2266 
2267    This can be represented as a collection of submatrices as:
2268 
2269 .vb
2270       A B C
2271       D E F
2272       G H I
2273 .ve
2274 
2275    Where the submatrices A,B,C are owned by proc0, D,E,F are
2276    owned by proc1, G,H,I are owned by proc2.
2277 
2278    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2279    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2280    The 'M','N' parameters are 8,8, and have the same values on all procs.
2281 
2282    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2283    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2284    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2285    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2286    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2287    matrix, ans [DF] as another SeqAIJ matrix.
2288 
2289    When d_nz, o_nz parameters are specified, d_nz storage elements are
2290    allocated for every row of the local diagonal submatrix, and o_nz
2291    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2292    One way to choose d_nz and o_nz is to use the max nonzerors per local
2293    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2294    In this case, the values of d_nz,o_nz are:
2295 .vb
2296      proc0 : dnz = 2, o_nz = 2
2297      proc1 : dnz = 3, o_nz = 2
2298      proc2 : dnz = 1, o_nz = 4
2299 .ve
2300    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2301    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2302    for proc3. i.e we are using 12+15+10=37 storage locations to store
2303    34 values.
2304 
2305    When d_nnz, o_nnz parameters are specified, the storage is specified
2306    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2307    In the above case the values for d_nnz,o_nnz are:
2308 .vb
2309      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2310      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2311      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2312 .ve
2313    Here the space allocated is sum of all the above values i.e 34, and
2314    hence pre-allocation is perfect.
2315 
2316    Level: intermediate
2317 
2318 .keywords: matrix, aij, compressed row, sparse, parallel
2319 
2320 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2321 @*/
2322 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2323 {
2324   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2325 
2326   PetscFunctionBegin;
2327   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2328   if (f) {
2329     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2330   }
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "MatCreateMPIAIJ"
2336 /*@C
2337    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2338    (the default parallel PETSc format).  For good matrix assembly performance
2339    the user should preallocate the matrix storage by setting the parameters
2340    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2341    performance can be increased by more than a factor of 50.
2342 
2343    Collective on MPI_Comm
2344 
2345    Input Parameters:
2346 +  comm - MPI communicator
2347 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2348            This value should be the same as the local size used in creating the
2349            y vector for the matrix-vector product y = Ax.
2350 .  n - This value should be the same as the local size used in creating the
2351        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2352        calculated if N is given) For square matrices n is almost always m.
2353 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2354 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2355 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2356            (same value is used for all local rows)
2357 .  d_nnz - array containing the number of nonzeros in the various rows of the
2358            DIAGONAL portion of the local submatrix (possibly different for each row)
2359            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2360            The size of this array is equal to the number of local rows, i.e 'm'.
2361            You must leave room for the diagonal entry even if it is zero.
2362 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2363            submatrix (same value is used for all local rows).
2364 -  o_nnz - array containing the number of nonzeros in the various rows of the
2365            OFF-DIAGONAL portion of the local submatrix (possibly different for
2366            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2367            structure. The size of this array is equal to the number
2368            of local rows, i.e 'm'.
2369 
2370    Output Parameter:
2371 .  A - the matrix
2372 
2373    Notes:
2374    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2375    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2376    storage requirements for this matrix.
2377 
2378    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2379    processor than it must be used on all processors that share the object for
2380    that argument.
2381 
2382    The AIJ format (also called the Yale sparse matrix format or
2383    compressed row storage), is fully compatible with standard Fortran 77
2384    storage.  That is, the stored row and column indices can begin at
2385    either one (as in Fortran) or zero.  See the users manual for details.
2386 
2387    The user MUST specify either the local or global matrix dimensions
2388    (possibly both).
2389 
2390    The parallel matrix is partitioned such that the first m0 rows belong to
2391    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2392    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2393 
2394    The DIAGONAL portion of the local submatrix of a processor can be defined
2395    as the submatrix which is obtained by extraction the part corresponding
2396    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2397    first row that belongs to the processor, and r2 is the last row belonging
2398    to the this processor. This is a square mxm matrix. The remaining portion
2399    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2400 
2401    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2402 
2403    When calling this routine with a single process communicator, a matrix of
2404    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2405    type of communicator, use the construction mechanism:
2406      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2407 
2408    By default, this format uses inodes (identical nodes) when possible.
2409    We search for consecutive rows with the same nonzero structure, thereby
2410    reusing matrix information to achieve increased efficiency.
2411 
2412    Options Database Keys:
2413 +  -mat_aij_no_inode  - Do not use inodes
2414 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2415 -  -mat_aij_oneindex - Internally use indexing starting at 1
2416         rather than 0.  Note that when calling MatSetValues(),
2417         the user still MUST index entries starting at 0!
2418 
2419 
2420    Example usage:
2421 
2422    Consider the following 8x8 matrix with 34 non-zero values, that is
2423    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2424    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2425    as follows:
2426 
2427 .vb
2428             1  2  0  |  0  3  0  |  0  4
2429     Proc0   0  5  6  |  7  0  0  |  8  0
2430             9  0 10  | 11  0  0  | 12  0
2431     -------------------------------------
2432            13  0 14  | 15 16 17  |  0  0
2433     Proc1   0 18  0  | 19 20 21  |  0  0
2434             0  0  0  | 22 23  0  | 24  0
2435     -------------------------------------
2436     Proc2  25 26 27  |  0  0 28  | 29  0
2437            30  0  0  | 31 32 33  |  0 34
2438 .ve
2439 
2440    This can be represented as a collection of submatrices as:
2441 
2442 .vb
2443       A B C
2444       D E F
2445       G H I
2446 .ve
2447 
2448    Where the submatrices A,B,C are owned by proc0, D,E,F are
2449    owned by proc1, G,H,I are owned by proc2.
2450 
2451    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2452    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2453    The 'M','N' parameters are 8,8, and have the same values on all procs.
2454 
2455    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2456    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2457    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2458    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2459    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2460    matrix, ans [DF] as another SeqAIJ matrix.
2461 
2462    When d_nz, o_nz parameters are specified, d_nz storage elements are
2463    allocated for every row of the local diagonal submatrix, and o_nz
2464    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2465    One way to choose d_nz and o_nz is to use the max nonzerors per local
2466    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2467    In this case, the values of d_nz,o_nz are:
2468 .vb
2469      proc0 : dnz = 2, o_nz = 2
2470      proc1 : dnz = 3, o_nz = 2
2471      proc2 : dnz = 1, o_nz = 4
2472 .ve
2473    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2474    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2475    for proc3. i.e we are using 12+15+10=37 storage locations to store
2476    34 values.
2477 
2478    When d_nnz, o_nnz parameters are specified, the storage is specified
2479    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2480    In the above case the values for d_nnz,o_nnz are:
2481 .vb
2482      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2483      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2484      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2485 .ve
2486    Here the space allocated is sum of all the above values i.e 34, and
2487    hence pre-allocation is perfect.
2488 
2489    Level: intermediate
2490 
2491 .keywords: matrix, aij, compressed row, sparse, parallel
2492 
2493 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2494 @*/
2495 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2496 {
2497   int ierr,size;
2498 
2499   PetscFunctionBegin;
2500   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2501   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2502   if (size > 1) {
2503     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2504     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2505   } else {
2506     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2507     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2508   }
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2514 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2515 {
2516   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2517   PetscFunctionBegin;
2518   *Ad     = a->A;
2519   *Ao     = a->B;
2520   *colmap = a->garray;
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2526 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2527 {
2528   int        ierr,i;
2529   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2530 
2531   PetscFunctionBegin;
2532   if (coloring->ctype == IS_COLORING_LOCAL) {
2533     ISColoringValue *allcolors,*colors;
2534     ISColoring      ocoloring;
2535 
2536     /* set coloring for diagonal portion */
2537     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2538 
2539     /* set coloring for off-diagonal portion */
2540     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2541     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2542     for (i=0; i<a->B->n; i++) {
2543       colors[i] = allcolors[a->garray[i]];
2544     }
2545     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2546     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2547     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2548     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2549   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2550     ISColoringValue *colors;
2551     int             *larray;
2552     ISColoring      ocoloring;
2553 
2554     /* set coloring for diagonal portion */
2555     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2556     for (i=0; i<a->A->n; i++) {
2557       larray[i] = i + a->cstart;
2558     }
2559     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2560     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2561     for (i=0; i<a->A->n; i++) {
2562       colors[i] = coloring->colors[larray[i]];
2563     }
2564     ierr = PetscFree(larray);CHKERRQ(ierr);
2565     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2566     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2567     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2568 
2569     /* set coloring for off-diagonal portion */
2570     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2571     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2572     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2573     for (i=0; i<a->B->n; i++) {
2574       colors[i] = coloring->colors[larray[i]];
2575     }
2576     ierr = PetscFree(larray);CHKERRQ(ierr);
2577     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2578     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2579     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2580   } else {
2581     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2582   }
2583 
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2589 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2592   int        ierr;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2596   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2602 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2603 {
2604   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2605   int        ierr;
2606 
2607   PetscFunctionBegin;
2608   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2609   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatMerge"
2615 /*@C
2616       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2617                  matrices from each processor
2618 
2619     Collective on MPI_Comm
2620 
2621    Input Parameters:
2622 +    comm - the communicators the parallel matrix will live on
2623 -    inmat - the input sequential matrices
2624 
2625    Output Parameter:
2626 .    outmat - the parallel matrix generated
2627 
2628     Level: advanced
2629 
2630    Notes: The number of columns of the matrix in EACH of the seperate files
2631       MUST be the same.
2632 
2633 @*/
2634 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2635 {
2636   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2637   PetscScalar *values;
2638   PetscMap    columnmap,rowmap;
2639 
2640   PetscFunctionBegin;
2641 
2642   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2643 
2644   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2645   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2646   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2647   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2648   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2649   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2650 
2651   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2652   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2653   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2654   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2655   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2656 
2657   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2658   for (i=0;i<m;i++) {
2659     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2660     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2661     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2662   }
2663   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2664   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2665 
2666   for (i=0;i<m;i++) {
2667     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2668     I    = i + rstart;
2669     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2670     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2671   }
2672   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2673   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2674   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2675 
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatFileSplit"
2681 int MatFileSplit(Mat A,char *outfile)
2682 {
2683   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2684   PetscViewer out;
2685   char        *name;
2686   Mat         B;
2687   PetscScalar *values;
2688 
2689   PetscFunctionBegin;
2690 
2691   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2692   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2693   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2694   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2695   for (i=0;i<m;i++) {
2696     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2697     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2698     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2699   }
2700   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2701   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2702 
2703   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2704   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2705   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2706   sprintf(name,"%s.%d",outfile,rank);
2707   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2708   ierr = PetscFree(name);
2709   ierr = MatView(B,out);CHKERRQ(ierr);
2710   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2711   ierr = MatDestroy(B);CHKERRQ(ierr);
2712   PetscFunctionReturn(0);
2713 }
2714