xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2aa32adbb5e8a0c7c8984d1934cdfb31775e9c6d)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 /*
8   Local utility routine that creates a mapping from the global column
9 number to the local number in the off-diagonal part of the local
10 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
11 a slightly higher hash table cost; without it it is not scalable (each processor
12 has an order N integer array but is fast to acess.
13 */
14 #undef __FUNCT__
15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19   int        n = aij->B->n,i,ierr;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
29   PetscLogObjectMemory(mat,mat->N*sizeof(int));
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39  \
40     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
41     rmax = aimax[row]; nrow = ailen[row];  \
42     col1 = col - shift; \
43      \
44     low = 0; high = nrow; \
45     while (high-low > 5) { \
46       t = (low+high)/2; \
47       if (rp[t] > col) high = t; \
48       else             low  = t; \
49     } \
50       for (_i=low; _i<high; _i++) { \
51         if (rp[_i] > col1) break; \
52         if (rp[_i] == col1) { \
53           if (addv == ADD_VALUES) ap[_i] += value;   \
54           else                  ap[_i] = value; \
55           goto a_noinsert; \
56         } \
57       }  \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (int*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(int));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                  ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (nonew == 1) goto b_noinsert; \
133       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
134       if (nrow >= rmax) { \
135         /* there is no extra room in row, therefore enlarge */ \
136         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
137         PetscScalar *new_a; \
138  \
139         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
140  \
141         /* malloc new storage space */ \
142         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
143         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
144         new_j   = (int*)(new_a + new_nz); \
145         new_i   = new_j + new_nz; \
146  \
147         /* copy over old data into new slots */ \
148         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
149         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
150         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
151         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
152         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
153                                                            len*sizeof(int));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
156                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
157         /* free up old matrix storage */ \
158  \
159         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
160         if (!b->singlemalloc) { \
161           ierr = PetscFree(b->i);CHKERRQ(ierr); \
162           ierr = PetscFree(b->j);CHKERRQ(ierr); \
163         } \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = PETSC_TRUE; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for (ii=N; ii>=_i; ii--) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIAIJ"
187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
190   PetscScalar  value;
191   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
277   int        cstart = aij->cstart,cend = aij->cend,row,col;
278 
279   PetscFunctionBegin;
280   for (i=0; i<m; i++) {
281     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
282     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
283     if (idxm[i] >= rstart && idxm[i] < rend) {
284       row = idxm[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
287         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
288         if (idxn[j] >= cstart && idxn[j] < cend){
289           col = idxn[j] - cstart;
290           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
291         } else {
292           if (!aij->colmap) {
293             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
294           }
295 #if defined (PETSC_USE_CTABLE)
296           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
297           col --;
298 #else
299           col = aij->colmap[idxn[j]] - 1;
300 #endif
301           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
302           else {
303             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
304           }
305         }
306       }
307     } else {
308       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
309     }
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317 {
318   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
319   int         ierr,nstash,reallocs;
320   InsertMode  addv;
321 
322   PetscFunctionBegin;
323   if (aij->donotstash) {
324     PetscFunctionReturn(0);
325   }
326 
327   /* make sure all processors are either in INSERTMODE or ADDMODE */
328   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
329   if (addv == (ADD_VALUES|INSERT_VALUES)) {
330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331   }
332   mat->insertmode = addv; /* in case this processor had no cache */
333 
334   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
335   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
336   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
337   PetscFunctionReturn(0);
338 }
339 
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
344 {
345   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
346   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
347   int         i,j,rstart,ncols,n,ierr,flg;
348   int         *row,*col,other_disassembled;
349   PetscScalar *val;
350   InsertMode  addv = mat->insertmode;
351 
352   PetscFunctionBegin;
353   if (!aij->donotstash) {
354     while (1) {
355       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
356       if (!flg) break;
357 
358       for (i=0; i<n;) {
359         /* Now identify the consecutive vals belonging to the same row */
360         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361         if (j < n) ncols = j-i;
362         else       ncols = n-i;
363         /* Now assemble all these values with a single function call */
364         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
365         i = j;
366       }
367     }
368     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
369   }
370 
371   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
372   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
373 
374   /* determine if any processor has disassembled, if so we must
375      also disassemble ourselfs, in order that we may reassemble. */
376   /*
377      if nonzero structure of submatrix B cannot change then we know that
378      no processor disassembled thus we can skip this stuff
379   */
380   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
381     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
382     if (mat->was_assembled && !other_disassembled) {
383       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
384     }
385   }
386 
387   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
389   }
390   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
391   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
392 
393   if (aij->rowvalues) {
394     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
395     aij->rowvalues = 0;
396   }
397 
398   /* used by MatAXPY() */
399   a->xtoy = 0; b->xtoy = 0;
400   a->XtoY = 0; b->XtoY = 0;
401 
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
407 int MatZeroEntries_MPIAIJ(Mat A)
408 {
409   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410   int        ierr;
411 
412   PetscFunctionBegin;
413   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
414   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroRows_MPIAIJ"
420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
421 {
422   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
423   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
424   int            *nprocs,j,idx,nsends,row;
425   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
426   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
427   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
428   MPI_Comm       comm = A->comm;
429   MPI_Request    *send_waits,*recv_waits;
430   MPI_Status     recv_status,*send_status;
431   IS             istmp;
432   PetscTruth     found;
433 
434   PetscFunctionBegin;
435   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
442   for (i=0; i<N; i++) {
443     idx = rows[i];
444     found = PETSC_FALSE;
445     for (j=0; j<size; j++) {
446       if (idx >= owners[j] && idx < owners[j+1]) {
447         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
448       }
449     }
450     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
451   }
452   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
453 
454   /* inform other processors of number of messages and max length*/
455   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
456 
457   /* post receives:   */
458   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
459   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
460   for (i=0; i<nrecvs; i++) {
461     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
462   }
463 
464   /* do sends:
465       1) starts[i] gives the starting index in svalues for stuff going to
466          the ith processor
467   */
468   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
469   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
470   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
471   starts[0] = 0;
472   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
473   for (i=0; i<N; i++) {
474     svalues[starts[owner[i]]++] = rows[i];
475   }
476   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
477 
478   starts[0] = 0;
479   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   count = 0;
481   for (i=0; i<size; i++) {
482     if (nprocs[2*i+1]) {
483       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
484     }
485   }
486   ierr = PetscFree(starts);CHKERRQ(ierr);
487 
488   base = owners[rank];
489 
490   /*  wait on receives */
491   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
492   source = lens + nrecvs;
493   count  = nrecvs; slen = 0;
494   while (count) {
495     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
496     /* unpack receives into our local space */
497     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
498     source[imdex]  = recv_status.MPI_SOURCE;
499     lens[imdex]    = n;
500     slen          += n;
501     count--;
502   }
503   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
504 
505   /* move the data into the send scatter */
506   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<nrecvs; i++) {
509     values = rvalues + i*nmax;
510     for (j=0; j<lens[i]; j++) {
511       lrows[count++] = values[j] - base;
512     }
513   }
514   ierr = PetscFree(rvalues);CHKERRQ(ierr);
515   ierr = PetscFree(lens);CHKERRQ(ierr);
516   ierr = PetscFree(owner);CHKERRQ(ierr);
517   ierr = PetscFree(nprocs);CHKERRQ(ierr);
518 
519   /* actually zap the local rows */
520   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
521   PetscLogObjectParent(A,istmp);
522 
523   /*
524         Zero the required rows. If the "diagonal block" of the matrix
525      is square and the user wishes to set the diagonal we use seperate
526      code so that MatSetValues() is not called for each diagonal allocating
527      new memory, thus calling lots of mallocs and slowing things down.
528 
529        Contributed by: Mathew Knepley
530   */
531   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
533   if (diag && (l->A->M == l->A->N)) {
534     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
535   } else if (diag) {
536     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
537     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540     }
541     for (i = 0; i < slen; i++) {
542       row  = lrows[i] + rstart;
543       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   } else {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549   }
550   ierr = ISDestroy(istmp);CHKERRQ(ierr);
551   ierr = PetscFree(lrows);CHKERRQ(ierr);
552 
553   /* wait on sends */
554   if (nsends) {
555     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
556     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
557     ierr = PetscFree(send_status);CHKERRQ(ierr);
558   }
559   ierr = PetscFree(send_waits);CHKERRQ(ierr);
560   ierr = PetscFree(svalues);CHKERRQ(ierr);
561 
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMult_MPIAIJ"
567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
568 {
569   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
570   int        ierr,nt;
571 
572   PetscFunctionBegin;
573   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
574   if (nt != A->n) {
575     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
576   }
577   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
578   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
579   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
580   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMultAdd_MPIAIJ"
586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr;
590 
591   PetscFunctionBegin;
592   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
594   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
595   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602 {
603   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604   int        ierr;
605 
606   PetscFunctionBegin;
607   /* do nondiagonal part */
608   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
609   /* send it on its way */
610   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
611   /* do local part */
612   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
613   /* receive remote parts: note this assumes the values are not actually */
614   /* inserted in yy until the next line, which is true for my implementation*/
615   /* but is not perhaps always true. */
616   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 EXTERN_C_BEGIN
621 #undef __FUNCT__
622 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
623 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
624 {
625   MPI_Comm comm;
626   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
627   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
628   IS         Me,Notme;
629   int        M,N,first,last,*notme,ntids,i, ierr;
630 
631   PetscFunctionBegin;
632 
633   /* Easy test: symmetric diagonal block */
634   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
635   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
636   if (!*f) PetscFunctionReturn(0);
637   ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr);
638   ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr);
639   if (ntids==1) PetscFunctionReturn(0);
640 
641   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
642   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
643   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
644   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
645   for (i=0; i<first; i++) notme[i] = i;
646   for (i=last; i<M; i++) notme[i-last+first] = i;
647   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
648   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
649   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
650   Aoff = Aoffs[0];
651   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
652   Boff = Boffs[0];
653   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
654   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
655   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
656   ierr = ISDestroy(Me); CHKERRQ(ierr);
657   ierr = ISDestroy(Notme); CHKERRQ(ierr);
658 
659   PetscFunctionReturn(0);
660 }
661 EXTERN_C_END
662 
663 #undef __FUNCT__
664 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
665 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
666 {
667   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
668   int        ierr;
669 
670   PetscFunctionBegin;
671   /* do nondiagonal part */
672   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
673   /* send it on its way */
674   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
675   /* do local part */
676   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
677   /* receive remote parts: note this assumes the values are not actually */
678   /* inserted in yy until the next line, which is true for my implementation*/
679   /* but is not perhaps always true. */
680   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 /*
685   This only works correctly for square matrices where the subblock A->A is the
686    diagonal block
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
690 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
691 {
692   int        ierr;
693   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
694 
695   PetscFunctionBegin;
696   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
697   if (a->rstart != a->cstart || a->rend != a->cend) {
698     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
699   }
700   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatScale_MPIAIJ"
706 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
707 {
708   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
709   int        ierr;
710 
711   PetscFunctionBegin;
712   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
713   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "MatDestroy_MPIAIJ"
719 int MatDestroy_MPIAIJ(Mat mat)
720 {
721   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
722   int        ierr;
723 
724   PetscFunctionBegin;
725 #if defined(PETSC_USE_LOG)
726   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
727 #endif
728   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
729   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
730   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
731   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
732 #if defined (PETSC_USE_CTABLE)
733   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
734 #else
735   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
736 #endif
737   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
738   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
739   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
740   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
741   ierr = PetscFree(aij);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatView_MPIAIJ_Binary"
747 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
748 {
749   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
750   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
751   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
752   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
753   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
754   PetscScalar       *column_values;
755 
756   PetscFunctionBegin;
757   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
758   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
759   nz   = A->nz + B->nz;
760   if (rank == 0) {
761     header[0] = MAT_FILE_COOKIE;
762     header[1] = mat->M;
763     header[2] = mat->N;
764     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
765     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
766     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
767     /* get largest number of rows any processor has */
768     rlen = mat->m;
769     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
770     for (i=1; i<size; i++) {
771       rlen = PetscMax(rlen,range[i+1] - range[i]);
772     }
773   } else {
774     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
775     rlen = mat->m;
776   }
777 
778   /* load up the local row counts */
779   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
780   for (i=0; i<mat->m; i++) {
781     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
782   }
783 
784   /* store the row lengths to the file */
785   if (rank == 0) {
786     MPI_Status status;
787     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
788     for (i=1; i<size; i++) {
789       rlen = range[i+1] - range[i];
790       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
791       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
792     }
793   } else {
794     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
795   }
796   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
797 
798   /* load up the local column indices */
799   nzmax = nz; /* )th processor needs space a largest processor needs */
800   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
801   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
802   cnt  = 0;
803   for (i=0; i<mat->m; i++) {
804     for (j=B->i[i]; j<B->i[i+1]; j++) {
805       if ( (col = garray[B->j[j]]) > cstart) break;
806       column_indices[cnt++] = col;
807     }
808     for (k=A->i[i]; k<A->i[i+1]; k++) {
809       column_indices[cnt++] = A->j[k] + cstart;
810     }
811     for (; j<B->i[i+1]; j++) {
812       column_indices[cnt++] = garray[B->j[j]];
813     }
814   }
815   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
816 
817   /* store the column indices to the file */
818   if (rank == 0) {
819     MPI_Status status;
820     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
821     for (i=1; i<size; i++) {
822       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
824       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
826     }
827   } else {
828     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
829     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
830   }
831   ierr = PetscFree(column_indices);CHKERRQ(ierr);
832 
833   /* load up the local column values */
834   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
835   cnt  = 0;
836   for (i=0; i<mat->m; i++) {
837     for (j=B->i[i]; j<B->i[i+1]; j++) {
838       if ( garray[B->j[j]] > cstart) break;
839       column_values[cnt++] = B->a[j];
840     }
841     for (k=A->i[i]; k<A->i[i+1]; k++) {
842       column_values[cnt++] = A->a[k];
843     }
844     for (; j<B->i[i+1]; j++) {
845       column_values[cnt++] = B->a[j];
846     }
847   }
848   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
849 
850   /* store the column values to the file */
851   if (rank == 0) {
852     MPI_Status status;
853     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
854     for (i=1; i<size; i++) {
855       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
857       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
858       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
859     }
860   } else {
861     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
862     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(column_values);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
870 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
871 {
872   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
873   int               ierr,rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,isascii,flg,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (isascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo info;
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
889       if (flg) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
891 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
894 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
906       PetscFunctionReturn(0);
907     }
908   } else if (isbinary) {
909     if (size == 1) {
910       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
911       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
912     } else {
913       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
914     }
915     PetscFunctionReturn(0);
916   } else if (isdraw) {
917     PetscDraw  draw;
918     PetscTruth isnull;
919     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
920     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
921   }
922 
923   if (size == 1) {
924     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
925     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
926   } else {
927     /* assemble the entire matrix onto first processor. */
928     Mat         A;
929     Mat_SeqAIJ *Aloc;
930     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
931     PetscScalar *a;
932 
933     if (!rank) {
934       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
935     } else {
936       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
937     }
938     PetscLogObjectParent(mat,A);
939 
940     /* copy over the A part */
941     Aloc = (Mat_SeqAIJ*)aij->A->data;
942     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row = aij->rstart;
944     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
945     for (i=0; i<m; i++) {
946       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
947       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
948     }
949     aj = Aloc->j;
950     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
951 
952     /* copy over the B part */
953     Aloc = (Mat_SeqAIJ*)aij->B->data;
954     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row  = aij->rstart;
956     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
957     ct   = cols;
958     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
959     for (i=0; i<m; i++) {
960       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
961       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
962     }
963     ierr = PetscFree(ct);CHKERRQ(ierr);
964     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     /*
967        Everyone has to call to draw the matrix since the graphics waits are
968        synchronized across all processors that share the PetscDraw object
969     */
970     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
971     if (!rank) {
972       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
973       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
974     }
975     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
976     ierr = MatDestroy(A);CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatView_MPIAIJ"
983 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
984 {
985   int        ierr;
986   PetscTruth isascii,isdraw,issocket,isbinary;
987 
988   PetscFunctionBegin;
989   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
990   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
991   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
992   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
993   if (isascii || isdraw || isbinary || issocket) {
994     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
995   } else {
996     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatRelax_MPIAIJ"
1005 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1006 {
1007   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1008   int          ierr;
1009   Vec          bb1;
1010   PetscScalar  mone=-1.0;
1011 
1012   PetscFunctionBegin;
1013   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1014 
1015   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1016 
1017   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1018     if (flag & SOR_ZERO_INITIAL_GUESS) {
1019       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1020       its--;
1021     }
1022 
1023     while (its--) {
1024       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1025       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1026 
1027       /* update rhs: bb1 = bb - B*x */
1028       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1029       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1030 
1031       /* local sweep */
1032       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1033       CHKERRQ(ierr);
1034     }
1035   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1036     if (flag & SOR_ZERO_INITIAL_GUESS) {
1037       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1038       its--;
1039     }
1040     while (its--) {
1041       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1042       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1043 
1044       /* update rhs: bb1 = bb - B*x */
1045       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1046       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1047 
1048       /* local sweep */
1049       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1050       CHKERRQ(ierr);
1051     }
1052   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1053     if (flag & SOR_ZERO_INITIAL_GUESS) {
1054       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1055       its--;
1056     }
1057     while (its--) {
1058       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1060 
1061       /* update rhs: bb1 = bb - B*x */
1062       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1063       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1064 
1065       /* local sweep */
1066       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1067       CHKERRQ(ierr);
1068     }
1069   } else {
1070     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1071   }
1072 
1073   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1079 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080 {
1081   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1082   Mat        A = mat->A,B = mat->B;
1083   int        ierr;
1084   PetscReal  isend[5],irecv[5];
1085 
1086   PetscFunctionBegin;
1087   info->block_size     = 1.0;
1088   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090   isend[3] = info->memory;  isend[4] = info->mallocs;
1091   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093   isend[3] += info->memory;  isend[4] += info->mallocs;
1094   if (flag == MAT_LOCAL) {
1095     info->nz_used      = isend[0];
1096     info->nz_allocated = isend[1];
1097     info->nz_unneeded  = isend[2];
1098     info->memory       = isend[3];
1099     info->mallocs      = isend[4];
1100   } else if (flag == MAT_GLOBAL_MAX) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else if (flag == MAT_GLOBAL_SUM) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   }
1115   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1116   info->fill_ratio_needed = 0;
1117   info->factor_mallocs    = 0;
1118   info->rows_global       = (double)matin->M;
1119   info->columns_global    = (double)matin->N;
1120   info->rows_local        = (double)matin->m;
1121   info->columns_local     = (double)matin->N;
1122 
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatSetOption_MPIAIJ"
1128 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1129 {
1130   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1131   int        ierr;
1132 
1133   PetscFunctionBegin;
1134   switch (op) {
1135   case MAT_NO_NEW_NONZERO_LOCATIONS:
1136   case MAT_YES_NEW_NONZERO_LOCATIONS:
1137   case MAT_COLUMNS_UNSORTED:
1138   case MAT_COLUMNS_SORTED:
1139   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1140   case MAT_KEEP_ZEROED_ROWS:
1141   case MAT_NEW_NONZERO_LOCATION_ERR:
1142   case MAT_USE_INODES:
1143   case MAT_DO_NOT_USE_INODES:
1144   case MAT_IGNORE_ZERO_ENTRIES:
1145     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1146     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1147     break;
1148   case MAT_ROW_ORIENTED:
1149     a->roworiented = PETSC_TRUE;
1150     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1151     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1152     break;
1153   case MAT_ROWS_SORTED:
1154   case MAT_ROWS_UNSORTED:
1155   case MAT_YES_NEW_DIAGONALS:
1156     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1157     break;
1158   case MAT_COLUMN_ORIENTED:
1159     a->roworiented = PETSC_FALSE;
1160     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1161     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1162     break;
1163   case MAT_IGNORE_OFF_PROC_ENTRIES:
1164     a->donotstash = PETSC_TRUE;
1165     break;
1166   case MAT_NO_NEW_DIAGONALS:
1167     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1168   default:
1169     SETERRQ(PETSC_ERR_SUP,"unknown option");
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatGetRow_MPIAIJ"
1176 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1177 {
1178   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1179   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1180   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1181   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1182   int          *cmap,*idx_p;
1183 
1184   PetscFunctionBegin;
1185   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1186   mat->getrowactive = PETSC_TRUE;
1187 
1188   if (!mat->rowvalues && (idx || v)) {
1189     /*
1190         allocate enough space to hold information from the longest row.
1191     */
1192     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1193     int     max = 1,tmp;
1194     for (i=0; i<matin->m; i++) {
1195       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1196       if (max < tmp) { max = tmp; }
1197     }
1198     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1199     mat->rowindices = (int*)(mat->rowvalues + max);
1200   }
1201 
1202   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1203   lrow = row - rstart;
1204 
1205   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1206   if (!v)   {pvA = 0; pvB = 0;}
1207   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1208   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1209   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1210   nztot = nzA + nzB;
1211 
1212   cmap  = mat->garray;
1213   if (v  || idx) {
1214     if (nztot) {
1215       /* Sort by increasing column numbers, assuming A and B already sorted */
1216       int imark = -1;
1217       if (v) {
1218         *v = v_p = mat->rowvalues;
1219         for (i=0; i<nzB; i++) {
1220           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1221           else break;
1222         }
1223         imark = i;
1224         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1225         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1226       }
1227       if (idx) {
1228         *idx = idx_p = mat->rowindices;
1229         if (imark > -1) {
1230           for (i=0; i<imark; i++) {
1231             idx_p[i] = cmap[cworkB[i]];
1232           }
1233         } else {
1234           for (i=0; i<nzB; i++) {
1235             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1236             else break;
1237           }
1238           imark = i;
1239         }
1240         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1241         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1242       }
1243     } else {
1244       if (idx) *idx = 0;
1245       if (v)   *v   = 0;
1246     }
1247   }
1248   *nz = nztot;
1249   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1250   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1256 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1257 {
1258   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1259 
1260   PetscFunctionBegin;
1261   if (aij->getrowactive == PETSC_FALSE) {
1262     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1263   }
1264   aij->getrowactive = PETSC_FALSE;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatNorm_MPIAIJ"
1270 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1271 {
1272   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1273   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1274   int          ierr,i,j,cstart = aij->cstart;
1275   PetscReal    sum = 0.0;
1276   PetscScalar  *v;
1277 
1278   PetscFunctionBegin;
1279   if (aij->size == 1) {
1280     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1281   } else {
1282     if (type == NORM_FROBENIUS) {
1283       v = amat->a;
1284       for (i=0; i<amat->nz; i++) {
1285 #if defined(PETSC_USE_COMPLEX)
1286         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1287 #else
1288         sum += (*v)*(*v); v++;
1289 #endif
1290       }
1291       v = bmat->a;
1292       for (i=0; i<bmat->nz; i++) {
1293 #if defined(PETSC_USE_COMPLEX)
1294         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1295 #else
1296         sum += (*v)*(*v); v++;
1297 #endif
1298       }
1299       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1300       *norm = sqrt(*norm);
1301     } else if (type == NORM_1) { /* max column norm */
1302       PetscReal *tmp,*tmp2;
1303       int    *jj,*garray = aij->garray;
1304       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1305       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1306       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1307       *norm = 0.0;
1308       v = amat->a; jj = amat->j;
1309       for (j=0; j<amat->nz; j++) {
1310         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1311       }
1312       v = bmat->a; jj = bmat->j;
1313       for (j=0; j<bmat->nz; j++) {
1314         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1315       }
1316       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1317       for (j=0; j<mat->N; j++) {
1318         if (tmp2[j] > *norm) *norm = tmp2[j];
1319       }
1320       ierr = PetscFree(tmp);CHKERRQ(ierr);
1321       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1322     } else if (type == NORM_INFINITY) { /* max row norm */
1323       PetscReal ntemp = 0.0;
1324       for (j=0; j<aij->A->m; j++) {
1325         v = amat->a + amat->i[j];
1326         sum = 0.0;
1327         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1328           sum += PetscAbsScalar(*v); v++;
1329         }
1330         v = bmat->a + bmat->i[j];
1331         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1332           sum += PetscAbsScalar(*v); v++;
1333         }
1334         if (sum > ntemp) ntemp = sum;
1335       }
1336       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1337     } else {
1338       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1339     }
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatTranspose_MPIAIJ"
1346 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1347 {
1348   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1349   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1350   int          ierr;
1351   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1352   Mat          B;
1353   PetscScalar  *array;
1354 
1355   PetscFunctionBegin;
1356   if (!matout && M != N) {
1357     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1358   }
1359 
1360   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1361 
1362   /* copy over the A part */
1363   Aloc = (Mat_SeqAIJ*)a->A->data;
1364   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1365   row = a->rstart;
1366   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1367   for (i=0; i<m; i++) {
1368     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1369     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1370   }
1371   aj = Aloc->j;
1372   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1373 
1374   /* copy over the B part */
1375   Aloc = (Mat_SeqAIJ*)a->B->data;
1376   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1377   row  = a->rstart;
1378   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1379   ct   = cols;
1380   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1381   for (i=0; i<m; i++) {
1382     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1383     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1384   }
1385   ierr = PetscFree(ct);CHKERRQ(ierr);
1386   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1387   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388   if (matout) {
1389     *matout = B;
1390   } else {
1391     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1392   }
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1398 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1399 {
1400   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1401   Mat        a = aij->A,b = aij->B;
1402   int        ierr,s1,s2,s3;
1403 
1404   PetscFunctionBegin;
1405   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1406   if (rr) {
1407     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1408     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1409     /* Overlap communication with computation. */
1410     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1411   }
1412   if (ll) {
1413     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1414     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1415     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1416   }
1417   /* scale  the diagonal block */
1418   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1419 
1420   if (rr) {
1421     /* Do a scatter end and then right scale the off-diagonal block */
1422     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1423     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1424   }
1425 
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1432 int MatPrintHelp_MPIAIJ(Mat A)
1433 {
1434   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1435   int        ierr;
1436 
1437   PetscFunctionBegin;
1438   if (!a->rank) {
1439     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1446 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1447 {
1448   PetscFunctionBegin;
1449   *bs = 1;
1450   PetscFunctionReturn(0);
1451 }
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1454 int MatSetUnfactored_MPIAIJ(Mat A)
1455 {
1456   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1457   int        ierr;
1458 
1459   PetscFunctionBegin;
1460   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatEqual_MPIAIJ"
1466 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1467 {
1468   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1469   Mat        a,b,c,d;
1470   PetscTruth flg;
1471   int        ierr;
1472 
1473   PetscFunctionBegin;
1474   a = matA->A; b = matA->B;
1475   c = matB->A; d = matB->B;
1476 
1477   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1478   if (flg == PETSC_TRUE) {
1479     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1480   }
1481   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatCopy_MPIAIJ"
1487 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1488 {
1489   int        ierr;
1490   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1491   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1492 
1493   PetscFunctionBegin;
1494   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1495   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1496     /* because of the column compression in the off-processor part of the matrix a->B,
1497        the number of columns in a->B and b->B may be different, hence we cannot call
1498        the MatCopy() directly on the two parts. If need be, we can provide a more
1499        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1500        then copying the submatrices */
1501     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1502   } else {
1503     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1504     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1505   }
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1511 int MatSetUpPreallocation_MPIAIJ(Mat A)
1512 {
1513   int        ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #include "petscblaslapack.h"
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatAXPY_MPIAIJ"
1523 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1524 {
1525   int        ierr,one=1,i;
1526   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1527   Mat_SeqAIJ *x,*y;
1528 
1529   PetscFunctionBegin;
1530   if (str == SAME_NONZERO_PATTERN) {
1531     x = (Mat_SeqAIJ *)xx->A->data;
1532     y = (Mat_SeqAIJ *)yy->A->data;
1533     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1534     x = (Mat_SeqAIJ *)xx->B->data;
1535     y = (Mat_SeqAIJ *)yy->B->data;
1536     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1537   } else if (str == SUBSET_NONZERO_PATTERN) {
1538     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1539 
1540     x = (Mat_SeqAIJ *)xx->B->data;
1541     y = (Mat_SeqAIJ *)yy->B->data;
1542     if (y->xtoy && y->XtoY != xx->B) {
1543       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1544       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1545     }
1546     if (!y->xtoy) { /* get xtoy */
1547       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1548       y->XtoY = xx->B;
1549     }
1550     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1551   } else {
1552     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /* -------------------------------------------------------------------*/
1558 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1559        MatGetRow_MPIAIJ,
1560        MatRestoreRow_MPIAIJ,
1561        MatMult_MPIAIJ,
1562 /* 4*/ MatMultAdd_MPIAIJ,
1563        MatMultTranspose_MPIAIJ,
1564        MatMultTransposeAdd_MPIAIJ,
1565        0,
1566        0,
1567        0,
1568 /*10*/ 0,
1569        0,
1570        0,
1571        MatRelax_MPIAIJ,
1572        MatTranspose_MPIAIJ,
1573 /*15*/ MatGetInfo_MPIAIJ,
1574        MatEqual_MPIAIJ,
1575        MatGetDiagonal_MPIAIJ,
1576        MatDiagonalScale_MPIAIJ,
1577        MatNorm_MPIAIJ,
1578 /*20*/ MatAssemblyBegin_MPIAIJ,
1579        MatAssemblyEnd_MPIAIJ,
1580        0,
1581        MatSetOption_MPIAIJ,
1582        MatZeroEntries_MPIAIJ,
1583 /*25*/ MatZeroRows_MPIAIJ,
1584 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1585        MatLUFactorSymbolic_MPIAIJ_TFS,
1586 #else
1587        0,
1588 #endif
1589        0,
1590        0,
1591        0,
1592 /*30*/ MatSetUpPreallocation_MPIAIJ,
1593        0,
1594        0,
1595        0,
1596        0,
1597 /*35*/ MatDuplicate_MPIAIJ,
1598        0,
1599        0,
1600        0,
1601        0,
1602 /*40*/ MatAXPY_MPIAIJ,
1603        MatGetSubMatrices_MPIAIJ,
1604        MatIncreaseOverlap_MPIAIJ,
1605        MatGetValues_MPIAIJ,
1606        MatCopy_MPIAIJ,
1607 /*45*/ MatPrintHelp_MPIAIJ,
1608        MatScale_MPIAIJ,
1609        0,
1610        0,
1611        0,
1612 /*50*/ MatGetBlockSize_MPIAIJ,
1613        0,
1614        0,
1615        0,
1616        0,
1617 /*55*/ MatFDColoringCreate_MPIAIJ,
1618        0,
1619        MatSetUnfactored_MPIAIJ,
1620        0,
1621        0,
1622 /*60*/ MatGetSubMatrix_MPIAIJ,
1623        MatDestroy_MPIAIJ,
1624        MatView_MPIAIJ,
1625        MatGetPetscMaps_Petsc,
1626        0,
1627 /*65*/ 0,
1628        0,
1629        0,
1630        0,
1631        0,
1632 /*70*/ 0,
1633        0,
1634        MatSetColoring_MPIAIJ,
1635        MatSetValuesAdic_MPIAIJ,
1636        MatSetValuesAdifor_MPIAIJ,
1637 /*75*/ 0,
1638        0,
1639        0,
1640        0,
1641        0,
1642 /*80*/ 0,
1643        0,
1644        0,
1645        0,
1646        0,
1647 /*85*/ MatLoad_MPIAIJ};
1648 
1649 /* ----------------------------------------------------------------------------------------*/
1650 
1651 EXTERN_C_BEGIN
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1654 int MatStoreValues_MPIAIJ(Mat mat)
1655 {
1656   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1657   int        ierr;
1658 
1659   PetscFunctionBegin;
1660   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1661   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1662   PetscFunctionReturn(0);
1663 }
1664 EXTERN_C_END
1665 
1666 EXTERN_C_BEGIN
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1669 int MatRetrieveValues_MPIAIJ(Mat mat)
1670 {
1671   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1672   int        ierr;
1673 
1674   PetscFunctionBegin;
1675   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1676   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 EXTERN_C_END
1680 
1681 #include "petscpc.h"
1682 EXTERN_C_BEGIN
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1685 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1686 {
1687   Mat_MPIAIJ   *b;
1688   int          ierr,i;
1689 
1690   PetscFunctionBegin;
1691   B->preallocated = PETSC_TRUE;
1692   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1693   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1694   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1695   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1696   if (d_nnz) {
1697     for (i=0; i<B->m; i++) {
1698       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1699     }
1700   }
1701   if (o_nnz) {
1702     for (i=0; i<B->m; i++) {
1703       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1704     }
1705   }
1706   b = (Mat_MPIAIJ*)B->data;
1707 
1708   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1709   PetscLogObjectParent(B,b->A);
1710   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1711   PetscLogObjectParent(B,b->B);
1712 
1713   PetscFunctionReturn(0);
1714 }
1715 EXTERN_C_END
1716 
1717 /*MC
1718    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1719 
1720    Options Database Keys:
1721 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1722 
1723   Level: beginner
1724 
1725 .seealso: MatCreateMPIAIJ
1726 M*/
1727 
1728 EXTERN_C_BEGIN
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatCreate_MPIAIJ"
1731 int MatCreate_MPIAIJ(Mat B)
1732 {
1733   Mat_MPIAIJ *b;
1734   int        ierr,i,size;
1735 
1736   PetscFunctionBegin;
1737   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1738 
1739   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1740   B->data         = (void*)b;
1741   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1742   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1743   B->factor       = 0;
1744   B->assembled    = PETSC_FALSE;
1745   B->mapping      = 0;
1746 
1747   B->insertmode      = NOT_SET_VALUES;
1748   b->size            = size;
1749   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1750 
1751   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1752   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1753 
1754   /* the information in the maps duplicates the information computed below, eventually
1755      we should remove the duplicate information that is not contained in the maps */
1756   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1757   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1758 
1759   /* build local table of row and column ownerships */
1760   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1761   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1762   b->cowners = b->rowners + b->size + 2;
1763   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1764   b->rowners[0] = 0;
1765   for (i=2; i<=b->size; i++) {
1766     b->rowners[i] += b->rowners[i-1];
1767   }
1768   b->rstart = b->rowners[b->rank];
1769   b->rend   = b->rowners[b->rank+1];
1770   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1771   b->cowners[0] = 0;
1772   for (i=2; i<=b->size; i++) {
1773     b->cowners[i] += b->cowners[i-1];
1774   }
1775   b->cstart = b->cowners[b->rank];
1776   b->cend   = b->cowners[b->rank+1];
1777 
1778   /* build cache for off array entries formed */
1779   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1780   b->donotstash  = PETSC_FALSE;
1781   b->colmap      = 0;
1782   b->garray      = 0;
1783   b->roworiented = PETSC_TRUE;
1784 
1785   /* stuff used for matrix vector multiply */
1786   b->lvec      = PETSC_NULL;
1787   b->Mvctx     = PETSC_NULL;
1788 
1789   /* stuff for MatGetRow() */
1790   b->rowindices   = 0;
1791   b->rowvalues    = 0;
1792   b->getrowactive = PETSC_FALSE;
1793 
1794   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1795                                      "MatStoreValues_MPIAIJ",
1796                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1798                                      "MatRetrieveValues_MPIAIJ",
1799                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1801 				     "MatGetDiagonalBlock_MPIAIJ",
1802                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1804 				     "MatIsSymmetric_MPIAIJ",
1805 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1807 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1808 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1810 				     "MatDiagonalScaleLocal_MPIAIJ",
1811 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 EXTERN_C_END
1815 
1816 /*MC
1817    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1818 
1819    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1820    and MATMPIAIJ otherwise.
1821 
1822    Options Database Keys:
1823 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1824 
1825   Level: beginner
1826 
1827 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1828 M*/
1829 
1830 EXTERN_C_BEGIN
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatCreate_AIJ"
1833 int MatCreate_AIJ(Mat A) {
1834   int ierr,size;
1835 
1836   PetscFunctionBegin;
1837   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1838   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1839   if (size == 1) {
1840     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1841   } else {
1842     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 EXTERN_C_END
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1850 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1851 {
1852   Mat        mat;
1853   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1854   int        ierr;
1855 
1856   PetscFunctionBegin;
1857   *newmat       = 0;
1858   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1859   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1860   a    = (Mat_MPIAIJ*)mat->data;
1861   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1862   mat->factor       = matin->factor;
1863   mat->assembled    = PETSC_TRUE;
1864   mat->insertmode   = NOT_SET_VALUES;
1865   mat->preallocated = PETSC_TRUE;
1866 
1867   a->rstart       = oldmat->rstart;
1868   a->rend         = oldmat->rend;
1869   a->cstart       = oldmat->cstart;
1870   a->cend         = oldmat->cend;
1871   a->size         = oldmat->size;
1872   a->rank         = oldmat->rank;
1873   a->donotstash   = oldmat->donotstash;
1874   a->roworiented  = oldmat->roworiented;
1875   a->rowindices   = 0;
1876   a->rowvalues    = 0;
1877   a->getrowactive = PETSC_FALSE;
1878 
1879   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1880   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1881   if (oldmat->colmap) {
1882 #if defined (PETSC_USE_CTABLE)
1883     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1884 #else
1885     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1886     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1887     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1888 #endif
1889   } else a->colmap = 0;
1890   if (oldmat->garray) {
1891     int len;
1892     len  = oldmat->B->n;
1893     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1894     PetscLogObjectMemory(mat,len*sizeof(int));
1895     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1896   } else a->garray = 0;
1897 
1898   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1899   PetscLogObjectParent(mat,a->lvec);
1900   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1901   PetscLogObjectParent(mat,a->Mvctx);
1902   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1903   PetscLogObjectParent(mat,a->A);
1904   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1905   PetscLogObjectParent(mat,a->B);
1906   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1907   *newmat = mat;
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #include "petscsys.h"
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatLoad_MPIAIJ"
1915 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1916 {
1917   Mat          A;
1918   PetscScalar  *vals,*svals;
1919   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1920   MPI_Status   status;
1921   int          i,nz,ierr,j,rstart,rend,fd;
1922   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1923   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1924   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1925 
1926   PetscFunctionBegin;
1927   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1928   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1929   if (!rank) {
1930     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1931     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1932     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1933     if (header[3] < 0) {
1934       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1935     }
1936   }
1937 
1938   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1939   M = header[1]; N = header[2];
1940   /* determine ownership of all rows */
1941   m = M/size + ((M % size) > rank);
1942   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1943   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1944   rowners[0] = 0;
1945   for (i=2; i<=size; i++) {
1946     rowners[i] += rowners[i-1];
1947   }
1948   rstart = rowners[rank];
1949   rend   = rowners[rank+1];
1950 
1951   /* distribute row lengths to all processors */
1952   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1953   offlens = ourlens + (rend-rstart);
1954   if (!rank) {
1955     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1956     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1957     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1958     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1959     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1960     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1961   } else {
1962     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1963   }
1964 
1965   if (!rank) {
1966     /* calculate the number of nonzeros on each processor */
1967     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1968     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1969     for (i=0; i<size; i++) {
1970       for (j=rowners[i]; j< rowners[i+1]; j++) {
1971         procsnz[i] += rowlengths[j];
1972       }
1973     }
1974     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1975 
1976     /* determine max buffer needed and allocate it */
1977     maxnz = 0;
1978     for (i=0; i<size; i++) {
1979       maxnz = PetscMax(maxnz,procsnz[i]);
1980     }
1981     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1982 
1983     /* read in my part of the matrix column indices  */
1984     nz   = procsnz[0];
1985     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1986     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1987 
1988     /* read in every one elses and ship off */
1989     for (i=1; i<size; i++) {
1990       nz   = procsnz[i];
1991       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1992       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1993     }
1994     ierr = PetscFree(cols);CHKERRQ(ierr);
1995   } else {
1996     /* determine buffer space needed for message */
1997     nz = 0;
1998     for (i=0; i<m; i++) {
1999       nz += ourlens[i];
2000     }
2001     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2002 
2003     /* receive message of column indices*/
2004     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2005     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2006     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2007   }
2008 
2009   /* determine column ownership if matrix is not square */
2010   if (N != M) {
2011     n      = N/size + ((N % size) > rank);
2012     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2013     cstart = cend - n;
2014   } else {
2015     cstart = rstart;
2016     cend   = rend;
2017     n      = cend - cstart;
2018   }
2019 
2020   /* loop over local rows, determining number of off diagonal entries */
2021   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2022   jj = 0;
2023   for (i=0; i<m; i++) {
2024     for (j=0; j<ourlens[i]; j++) {
2025       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2026       jj++;
2027     }
2028   }
2029 
2030   /* create our matrix */
2031   for (i=0; i<m; i++) {
2032     ourlens[i] -= offlens[i];
2033   }
2034   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2035   ierr = MatSetType(A,type);CHKERRQ(ierr);
2036   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2037 
2038   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2039   for (i=0; i<m; i++) {
2040     ourlens[i] += offlens[i];
2041   }
2042 
2043   if (!rank) {
2044     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2045 
2046     /* read in my part of the matrix numerical values  */
2047     nz   = procsnz[0];
2048     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2049 
2050     /* insert into matrix */
2051     jj      = rstart;
2052     smycols = mycols;
2053     svals   = vals;
2054     for (i=0; i<m; i++) {
2055       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2056       smycols += ourlens[i];
2057       svals   += ourlens[i];
2058       jj++;
2059     }
2060 
2061     /* read in other processors and ship out */
2062     for (i=1; i<size; i++) {
2063       nz   = procsnz[i];
2064       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2065       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2066     }
2067     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2068   } else {
2069     /* receive numeric values */
2070     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2071 
2072     /* receive message of values*/
2073     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2074     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2075     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2076 
2077     /* insert into matrix */
2078     jj      = rstart;
2079     smycols = mycols;
2080     svals   = vals;
2081     for (i=0; i<m; i++) {
2082       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2083       smycols += ourlens[i];
2084       svals   += ourlens[i];
2085       jj++;
2086     }
2087   }
2088   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2089   ierr = PetscFree(vals);CHKERRQ(ierr);
2090   ierr = PetscFree(mycols);CHKERRQ(ierr);
2091   ierr = PetscFree(rowners);CHKERRQ(ierr);
2092 
2093   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2094   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2095   *newmat = A;
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #undef __FUNCT__
2100 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2101 /*
2102     Not great since it makes two copies of the submatrix, first an SeqAIJ
2103   in local and then by concatenating the local matrices the end result.
2104   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2105 */
2106 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2107 {
2108   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2109   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2110   Mat          *local,M,Mreuse;
2111   PetscScalar  *vwork,*aa;
2112   MPI_Comm     comm = mat->comm;
2113   Mat_SeqAIJ   *aij;
2114 
2115 
2116   PetscFunctionBegin;
2117   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2118   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2119 
2120   if (call ==  MAT_REUSE_MATRIX) {
2121     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2122     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2123     local = &Mreuse;
2124     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2125   } else {
2126     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2127     Mreuse = *local;
2128     ierr   = PetscFree(local);CHKERRQ(ierr);
2129   }
2130 
2131   /*
2132       m - number of local rows
2133       n - number of columns (same on all processors)
2134       rstart - first row in new global matrix generated
2135   */
2136   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2137   if (call == MAT_INITIAL_MATRIX) {
2138     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2139     ii  = aij->i;
2140     jj  = aij->j;
2141 
2142     /*
2143         Determine the number of non-zeros in the diagonal and off-diagonal
2144         portions of the matrix in order to do correct preallocation
2145     */
2146 
2147     /* first get start and end of "diagonal" columns */
2148     if (csize == PETSC_DECIDE) {
2149       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2150       if (mglobal == n) { /* square matrix */
2151 	nlocal = m;
2152       } else {
2153         nlocal = n/size + ((n % size) > rank);
2154       }
2155     } else {
2156       nlocal = csize;
2157     }
2158     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2159     rstart = rend - nlocal;
2160     if (rank == size - 1 && rend != n) {
2161       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2162     }
2163 
2164     /* next, compute all the lengths */
2165     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2166     olens = dlens + m;
2167     for (i=0; i<m; i++) {
2168       jend = ii[i+1] - ii[i];
2169       olen = 0;
2170       dlen = 0;
2171       for (j=0; j<jend; j++) {
2172         if (*jj < rstart || *jj >= rend) olen++;
2173         else dlen++;
2174         jj++;
2175       }
2176       olens[i] = olen;
2177       dlens[i] = dlen;
2178     }
2179     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2180     ierr = PetscFree(dlens);CHKERRQ(ierr);
2181   } else {
2182     int ml,nl;
2183 
2184     M = *newmat;
2185     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2186     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2187     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2188     /*
2189          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2190        rather than the slower MatSetValues().
2191     */
2192     M->was_assembled = PETSC_TRUE;
2193     M->assembled     = PETSC_FALSE;
2194   }
2195   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2196   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2197   ii  = aij->i;
2198   jj  = aij->j;
2199   aa  = aij->a;
2200   for (i=0; i<m; i++) {
2201     row   = rstart + i;
2202     nz    = ii[i+1] - ii[i];
2203     cwork = jj;     jj += nz;
2204     vwork = aa;     aa += nz;
2205     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2206   }
2207 
2208   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2209   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2210   *newmat = M;
2211 
2212   /* save submatrix used in processor for next request */
2213   if (call ==  MAT_INITIAL_MATRIX) {
2214     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2215     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2216   }
2217 
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2223 /*@C
2224    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2225    (the default parallel PETSc format).  For good matrix assembly performance
2226    the user should preallocate the matrix storage by setting the parameters
2227    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2228    performance can be increased by more than a factor of 50.
2229 
2230    Collective on MPI_Comm
2231 
2232    Input Parameters:
2233 +  A - the matrix
2234 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2235            (same value is used for all local rows)
2236 .  d_nnz - array containing the number of nonzeros in the various rows of the
2237            DIAGONAL portion of the local submatrix (possibly different for each row)
2238            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2239            The size of this array is equal to the number of local rows, i.e 'm'.
2240            You must leave room for the diagonal entry even if it is zero.
2241 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2242            submatrix (same value is used for all local rows).
2243 -  o_nnz - array containing the number of nonzeros in the various rows of the
2244            OFF-DIAGONAL portion of the local submatrix (possibly different for
2245            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2246            structure. The size of this array is equal to the number
2247            of local rows, i.e 'm'.
2248 
2249    The AIJ format (also called the Yale sparse matrix format or
2250    compressed row storage), is fully compatible with standard Fortran 77
2251    storage.  That is, the stored row and column indices can begin at
2252    either one (as in Fortran) or zero.  See the users manual for details.
2253 
2254    The user MUST specify either the local or global matrix dimensions
2255    (possibly both).
2256 
2257    The parallel matrix is partitioned such that the first m0 rows belong to
2258    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2259    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2260 
2261    The DIAGONAL portion of the local submatrix of a processor can be defined
2262    as the submatrix which is obtained by extraction the part corresponding
2263    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2264    first row that belongs to the processor, and r2 is the last row belonging
2265    to the this processor. This is a square mxm matrix. The remaining portion
2266    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2267 
2268    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2269 
2270    By default, this format uses inodes (identical nodes) when possible.
2271    We search for consecutive rows with the same nonzero structure, thereby
2272    reusing matrix information to achieve increased efficiency.
2273 
2274    Options Database Keys:
2275 +  -mat_aij_no_inode  - Do not use inodes
2276 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2277 -  -mat_aij_oneindex - Internally use indexing starting at 1
2278         rather than 0.  Note that when calling MatSetValues(),
2279         the user still MUST index entries starting at 0!
2280 
2281    Example usage:
2282 
2283    Consider the following 8x8 matrix with 34 non-zero values, that is
2284    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2285    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2286    as follows:
2287 
2288 .vb
2289             1  2  0  |  0  3  0  |  0  4
2290     Proc0   0  5  6  |  7  0  0  |  8  0
2291             9  0 10  | 11  0  0  | 12  0
2292     -------------------------------------
2293            13  0 14  | 15 16 17  |  0  0
2294     Proc1   0 18  0  | 19 20 21  |  0  0
2295             0  0  0  | 22 23  0  | 24  0
2296     -------------------------------------
2297     Proc2  25 26 27  |  0  0 28  | 29  0
2298            30  0  0  | 31 32 33  |  0 34
2299 .ve
2300 
2301    This can be represented as a collection of submatrices as:
2302 
2303 .vb
2304       A B C
2305       D E F
2306       G H I
2307 .ve
2308 
2309    Where the submatrices A,B,C are owned by proc0, D,E,F are
2310    owned by proc1, G,H,I are owned by proc2.
2311 
2312    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2313    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2314    The 'M','N' parameters are 8,8, and have the same values on all procs.
2315 
2316    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2317    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2318    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2319    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2320    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2321    matrix, ans [DF] as another SeqAIJ matrix.
2322 
2323    When d_nz, o_nz parameters are specified, d_nz storage elements are
2324    allocated for every row of the local diagonal submatrix, and o_nz
2325    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2326    One way to choose d_nz and o_nz is to use the max nonzerors per local
2327    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2328    In this case, the values of d_nz,o_nz are:
2329 .vb
2330      proc0 : dnz = 2, o_nz = 2
2331      proc1 : dnz = 3, o_nz = 2
2332      proc2 : dnz = 1, o_nz = 4
2333 .ve
2334    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2335    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2336    for proc3. i.e we are using 12+15+10=37 storage locations to store
2337    34 values.
2338 
2339    When d_nnz, o_nnz parameters are specified, the storage is specified
2340    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2341    In the above case the values for d_nnz,o_nnz are:
2342 .vb
2343      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2344      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2345      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2346 .ve
2347    Here the space allocated is sum of all the above values i.e 34, and
2348    hence pre-allocation is perfect.
2349 
2350    Level: intermediate
2351 
2352 .keywords: matrix, aij, compressed row, sparse, parallel
2353 
2354 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2355 @*/
2356 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2357 {
2358   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2359 
2360   PetscFunctionBegin;
2361   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2362   if (f) {
2363     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2364   }
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatCreateMPIAIJ"
2370 /*@C
2371    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2372    (the default parallel PETSc format).  For good matrix assembly performance
2373    the user should preallocate the matrix storage by setting the parameters
2374    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2375    performance can be increased by more than a factor of 50.
2376 
2377    Collective on MPI_Comm
2378 
2379    Input Parameters:
2380 +  comm - MPI communicator
2381 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2382            This value should be the same as the local size used in creating the
2383            y vector for the matrix-vector product y = Ax.
2384 .  n - This value should be the same as the local size used in creating the
2385        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2386        calculated if N is given) For square matrices n is almost always m.
2387 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2388 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2389 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2390            (same value is used for all local rows)
2391 .  d_nnz - array containing the number of nonzeros in the various rows of the
2392            DIAGONAL portion of the local submatrix (possibly different for each row)
2393            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2394            The size of this array is equal to the number of local rows, i.e 'm'.
2395            You must leave room for the diagonal entry even if it is zero.
2396 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2397            submatrix (same value is used for all local rows).
2398 -  o_nnz - array containing the number of nonzeros in the various rows of the
2399            OFF-DIAGONAL portion of the local submatrix (possibly different for
2400            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2401            structure. The size of this array is equal to the number
2402            of local rows, i.e 'm'.
2403 
2404    Output Parameter:
2405 .  A - the matrix
2406 
2407    Notes:
2408    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2409    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2410    storage requirements for this matrix.
2411 
2412    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2413    processor than it must be used on all processors that share the object for
2414    that argument.
2415 
2416    The AIJ format (also called the Yale sparse matrix format or
2417    compressed row storage), is fully compatible with standard Fortran 77
2418    storage.  That is, the stored row and column indices can begin at
2419    either one (as in Fortran) or zero.  See the users manual for details.
2420 
2421    The user MUST specify either the local or global matrix dimensions
2422    (possibly both).
2423 
2424    The parallel matrix is partitioned such that the first m0 rows belong to
2425    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2426    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2427 
2428    The DIAGONAL portion of the local submatrix of a processor can be defined
2429    as the submatrix which is obtained by extraction the part corresponding
2430    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2431    first row that belongs to the processor, and r2 is the last row belonging
2432    to the this processor. This is a square mxm matrix. The remaining portion
2433    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2434 
2435    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2436 
2437    When calling this routine with a single process communicator, a matrix of
2438    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2439    type of communicator, use the construction mechanism:
2440      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2441 
2442    By default, this format uses inodes (identical nodes) when possible.
2443    We search for consecutive rows with the same nonzero structure, thereby
2444    reusing matrix information to achieve increased efficiency.
2445 
2446    Options Database Keys:
2447 +  -mat_aij_no_inode  - Do not use inodes
2448 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2449 -  -mat_aij_oneindex - Internally use indexing starting at 1
2450         rather than 0.  Note that when calling MatSetValues(),
2451         the user still MUST index entries starting at 0!
2452 
2453 
2454    Example usage:
2455 
2456    Consider the following 8x8 matrix with 34 non-zero values, that is
2457    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2458    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2459    as follows:
2460 
2461 .vb
2462             1  2  0  |  0  3  0  |  0  4
2463     Proc0   0  5  6  |  7  0  0  |  8  0
2464             9  0 10  | 11  0  0  | 12  0
2465     -------------------------------------
2466            13  0 14  | 15 16 17  |  0  0
2467     Proc1   0 18  0  | 19 20 21  |  0  0
2468             0  0  0  | 22 23  0  | 24  0
2469     -------------------------------------
2470     Proc2  25 26 27  |  0  0 28  | 29  0
2471            30  0  0  | 31 32 33  |  0 34
2472 .ve
2473 
2474    This can be represented as a collection of submatrices as:
2475 
2476 .vb
2477       A B C
2478       D E F
2479       G H I
2480 .ve
2481 
2482    Where the submatrices A,B,C are owned by proc0, D,E,F are
2483    owned by proc1, G,H,I are owned by proc2.
2484 
2485    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2486    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2487    The 'M','N' parameters are 8,8, and have the same values on all procs.
2488 
2489    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2490    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2491    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2492    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2493    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2494    matrix, ans [DF] as another SeqAIJ matrix.
2495 
2496    When d_nz, o_nz parameters are specified, d_nz storage elements are
2497    allocated for every row of the local diagonal submatrix, and o_nz
2498    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2499    One way to choose d_nz and o_nz is to use the max nonzerors per local
2500    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2501    In this case, the values of d_nz,o_nz are:
2502 .vb
2503      proc0 : dnz = 2, o_nz = 2
2504      proc1 : dnz = 3, o_nz = 2
2505      proc2 : dnz = 1, o_nz = 4
2506 .ve
2507    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2508    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2509    for proc3. i.e we are using 12+15+10=37 storage locations to store
2510    34 values.
2511 
2512    When d_nnz, o_nnz parameters are specified, the storage is specified
2513    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2514    In the above case the values for d_nnz,o_nnz are:
2515 .vb
2516      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2517      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2518      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2519 .ve
2520    Here the space allocated is sum of all the above values i.e 34, and
2521    hence pre-allocation is perfect.
2522 
2523    Level: intermediate
2524 
2525 .keywords: matrix, aij, compressed row, sparse, parallel
2526 
2527 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2528 @*/
2529 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2530 {
2531   int ierr,size;
2532 
2533   PetscFunctionBegin;
2534   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2535   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2536   if (size > 1) {
2537     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2538     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2539   } else {
2540     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2541     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2542   }
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2548 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2549 {
2550   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2551   PetscFunctionBegin;
2552   *Ad     = a->A;
2553   *Ao     = a->B;
2554   *colmap = a->garray;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2560 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2561 {
2562   int        ierr,i;
2563   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2564 
2565   PetscFunctionBegin;
2566   if (coloring->ctype == IS_COLORING_LOCAL) {
2567     ISColoringValue *allcolors,*colors;
2568     ISColoring      ocoloring;
2569 
2570     /* set coloring for diagonal portion */
2571     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2572 
2573     /* set coloring for off-diagonal portion */
2574     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2575     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2576     for (i=0; i<a->B->n; i++) {
2577       colors[i] = allcolors[a->garray[i]];
2578     }
2579     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2580     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2581     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2582     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2583   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2584     ISColoringValue *colors;
2585     int             *larray;
2586     ISColoring      ocoloring;
2587 
2588     /* set coloring for diagonal portion */
2589     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2590     for (i=0; i<a->A->n; i++) {
2591       larray[i] = i + a->cstart;
2592     }
2593     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2594     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2595     for (i=0; i<a->A->n; i++) {
2596       colors[i] = coloring->colors[larray[i]];
2597     }
2598     ierr = PetscFree(larray);CHKERRQ(ierr);
2599     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2600     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2601     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2602 
2603     /* set coloring for off-diagonal portion */
2604     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2605     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2606     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2607     for (i=0; i<a->B->n; i++) {
2608       colors[i] = coloring->colors[larray[i]];
2609     }
2610     ierr = PetscFree(larray);CHKERRQ(ierr);
2611     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2612     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2613     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2614   } else {
2615     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2616   }
2617 
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 #undef __FUNCT__
2622 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2623 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2624 {
2625   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2626   int        ierr;
2627 
2628   PetscFunctionBegin;
2629   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2630   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 #undef __FUNCT__
2635 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2636 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2637 {
2638   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2639   int        ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2643   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatMerge"
2649 /*@C
2650       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2651                  matrices from each processor
2652 
2653     Collective on MPI_Comm
2654 
2655    Input Parameters:
2656 +    comm - the communicators the parallel matrix will live on
2657 -    inmat - the input sequential matrices
2658 
2659    Output Parameter:
2660 .    outmat - the parallel matrix generated
2661 
2662     Level: advanced
2663 
2664    Notes: The number of columns of the matrix in EACH of the seperate files
2665       MUST be the same.
2666 
2667 @*/
2668 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2669 {
2670   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2671   PetscScalar *values;
2672   PetscMap    columnmap,rowmap;
2673 
2674   PetscFunctionBegin;
2675 
2676   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2677 
2678   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2679   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2680   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2681   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2682   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2683   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2684 
2685   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2686   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2687   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2688   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2689   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2690 
2691   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2692   for (i=0;i<m;i++) {
2693     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2694     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2695     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2696   }
2697   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2698   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2699 
2700   for (i=0;i<m;i++) {
2701     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2702     I    = i + rstart;
2703     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2704     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2705   }
2706   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2707   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2708   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2709 
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatFileSplit"
2715 int MatFileSplit(Mat A,char *outfile)
2716 {
2717   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2718   PetscViewer out;
2719   char        *name;
2720   Mat         B;
2721   PetscScalar *values;
2722 
2723   PetscFunctionBegin;
2724 
2725   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2726   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2727   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2728   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2729   for (i=0;i<m;i++) {
2730     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2731     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2732     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2733   }
2734   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2735   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2736 
2737   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2738   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2739   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2740   sprintf(name,"%s.%d",outfile,rank);
2741   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2742   ierr = PetscFree(name);
2743   ierr = MatView(B,out);CHKERRQ(ierr);
2744   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2745   ierr = MatDestroy(B);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748