xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2fc52814d27bf1f4e71021c1c3ebb532b583ed60)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 /*
8   Local utility routine that creates a mapping from the global column
9 number to the local number in the off-diagonal part of the local
10 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
11 a slightly higher hash table cost; without it it is not scalable (each processor
12 has an order N integer array but is fast to acess.
13 */
14 #undef __FUNCT__
15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19   int        n = aij->B->n,i,ierr;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
29   PetscLogObjectMemory(mat,mat->N*sizeof(int));
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39  \
40     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
41     rmax = aimax[row]; nrow = ailen[row];  \
42     col1 = col - shift; \
43      \
44     low = 0; high = nrow; \
45     while (high-low > 5) { \
46       t = (low+high)/2; \
47       if (rp[t] > col) high = t; \
48       else             low  = t; \
49     } \
50       for (_i=low; _i<high; _i++) { \
51         if (rp[_i] > col1) break; \
52         if (rp[_i] == col1) { \
53           if (addv == ADD_VALUES) ap[_i] += value;   \
54           else                  ap[_i] = value; \
55           goto a_noinsert; \
56         } \
57       }  \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (int*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(int));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                  ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (nonew == 1) goto b_noinsert; \
133       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
134       if (nrow >= rmax) { \
135         /* there is no extra room in row, therefore enlarge */ \
136         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
137         PetscScalar *new_a; \
138  \
139         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
140  \
141         /* malloc new storage space */ \
142         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
143         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
144         new_j   = (int*)(new_a + new_nz); \
145         new_i   = new_j + new_nz; \
146  \
147         /* copy over old data into new slots */ \
148         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
149         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
150         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
151         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
152         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
153                                                            len*sizeof(int));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
156                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
157         /* free up old matrix storage */ \
158  \
159         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
160         if (!b->singlemalloc) { \
161           ierr = PetscFree(b->i);CHKERRQ(ierr); \
162           ierr = PetscFree(b->j);CHKERRQ(ierr); \
163         } \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = PETSC_TRUE; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for (ii=N; ii>=_i; ii--) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIAIJ"
187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
190   PetscScalar  value;
191   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
277   int        cstart = aij->cstart,cend = aij->cend,row,col;
278 
279   PetscFunctionBegin;
280   for (i=0; i<m; i++) {
281     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
282     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
283     if (idxm[i] >= rstart && idxm[i] < rend) {
284       row = idxm[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
287         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
288         if (idxn[j] >= cstart && idxn[j] < cend){
289           col = idxn[j] - cstart;
290           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
291         } else {
292           if (!aij->colmap) {
293             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
294           }
295 #if defined (PETSC_USE_CTABLE)
296           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
297           col --;
298 #else
299           col = aij->colmap[idxn[j]] - 1;
300 #endif
301           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
302           else {
303             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
304           }
305         }
306       }
307     } else {
308       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
309     }
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317 {
318   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
319   int         ierr,nstash,reallocs;
320   InsertMode  addv;
321 
322   PetscFunctionBegin;
323   if (aij->donotstash) {
324     PetscFunctionReturn(0);
325   }
326 
327   /* make sure all processors are either in INSERTMODE or ADDMODE */
328   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
329   if (addv == (ADD_VALUES|INSERT_VALUES)) {
330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331   }
332   mat->insertmode = addv; /* in case this processor had no cache */
333 
334   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
335   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
336   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
337   PetscFunctionReturn(0);
338 }
339 
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
344 {
345   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
346   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
347   int         i,j,rstart,ncols,n,ierr,flg;
348   int         *row,*col,other_disassembled;
349   PetscScalar *val;
350   InsertMode  addv = mat->insertmode;
351 
352   PetscFunctionBegin;
353   if (!aij->donotstash) {
354     while (1) {
355       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
356       if (!flg) break;
357 
358       for (i=0; i<n;) {
359         /* Now identify the consecutive vals belonging to the same row */
360         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361         if (j < n) ncols = j-i;
362         else       ncols = n-i;
363         /* Now assemble all these values with a single function call */
364         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
365         i = j;
366       }
367     }
368     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
369   }
370 
371   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
372   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
373 
374   /* determine if any processor has disassembled, if so we must
375      also disassemble ourselfs, in order that we may reassemble. */
376   /*
377      if nonzero structure of submatrix B cannot change then we know that
378      no processor disassembled thus we can skip this stuff
379   */
380   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
381     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
382     if (mat->was_assembled && !other_disassembled) {
383       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
384     }
385   }
386 
387   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
389   }
390   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
391   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
392 
393   if (aij->rowvalues) {
394     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
395     aij->rowvalues = 0;
396   }
397 
398   /* used by MatAXPY() */
399   a->xtoy = 0; b->xtoy = 0;
400   a->XtoY = 0; b->XtoY = 0;
401 
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
407 int MatZeroEntries_MPIAIJ(Mat A)
408 {
409   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410   int        ierr;
411 
412   PetscFunctionBegin;
413   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
414   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroRows_MPIAIJ"
420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
421 {
422   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
423   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
424   int            *nprocs,j,idx,nsends,row;
425   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
426   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
427   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
428   MPI_Comm       comm = A->comm;
429   MPI_Request    *send_waits,*recv_waits;
430   MPI_Status     recv_status,*send_status;
431   IS             istmp;
432   PetscTruth     found;
433 
434   PetscFunctionBegin;
435   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
442   for (i=0; i<N; i++) {
443     idx = rows[i];
444     found = PETSC_FALSE;
445     for (j=0; j<size; j++) {
446       if (idx >= owners[j] && idx < owners[j+1]) {
447         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
448       }
449     }
450     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
451   }
452   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
453 
454   /* inform other processors of number of messages and max length*/
455   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
456 
457   /* post receives:   */
458   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
459   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
460   for (i=0; i<nrecvs; i++) {
461     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
462   }
463 
464   /* do sends:
465       1) starts[i] gives the starting index in svalues for stuff going to
466          the ith processor
467   */
468   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
469   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
470   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
471   starts[0] = 0;
472   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
473   for (i=0; i<N; i++) {
474     svalues[starts[owner[i]]++] = rows[i];
475   }
476   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
477 
478   starts[0] = 0;
479   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   count = 0;
481   for (i=0; i<size; i++) {
482     if (nprocs[2*i+1]) {
483       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
484     }
485   }
486   ierr = PetscFree(starts);CHKERRQ(ierr);
487 
488   base = owners[rank];
489 
490   /*  wait on receives */
491   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
492   source = lens + nrecvs;
493   count  = nrecvs; slen = 0;
494   while (count) {
495     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
496     /* unpack receives into our local space */
497     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
498     source[imdex]  = recv_status.MPI_SOURCE;
499     lens[imdex]    = n;
500     slen          += n;
501     count--;
502   }
503   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
504 
505   /* move the data into the send scatter */
506   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<nrecvs; i++) {
509     values = rvalues + i*nmax;
510     for (j=0; j<lens[i]; j++) {
511       lrows[count++] = values[j] - base;
512     }
513   }
514   ierr = PetscFree(rvalues);CHKERRQ(ierr);
515   ierr = PetscFree(lens);CHKERRQ(ierr);
516   ierr = PetscFree(owner);CHKERRQ(ierr);
517   ierr = PetscFree(nprocs);CHKERRQ(ierr);
518 
519   /* actually zap the local rows */
520   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
521   PetscLogObjectParent(A,istmp);
522 
523   /*
524         Zero the required rows. If the "diagonal block" of the matrix
525      is square and the user wishes to set the diagonal we use seperate
526      code so that MatSetValues() is not called for each diagonal allocating
527      new memory, thus calling lots of mallocs and slowing things down.
528 
529        Contributed by: Mathew Knepley
530   */
531   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
533   if (diag && (l->A->M == l->A->N)) {
534     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
535   } else if (diag) {
536     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
537     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540     }
541     for (i = 0; i < slen; i++) {
542       row  = lrows[i] + rstart;
543       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   } else {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549   }
550   ierr = ISDestroy(istmp);CHKERRQ(ierr);
551   ierr = PetscFree(lrows);CHKERRQ(ierr);
552 
553   /* wait on sends */
554   if (nsends) {
555     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
556     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
557     ierr = PetscFree(send_status);CHKERRQ(ierr);
558   }
559   ierr = PetscFree(send_waits);CHKERRQ(ierr);
560   ierr = PetscFree(svalues);CHKERRQ(ierr);
561 
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMult_MPIAIJ"
567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
568 {
569   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
570   int        ierr,nt;
571 
572   PetscFunctionBegin;
573   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
574   if (nt != A->n) {
575     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
576   }
577   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
578   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
579   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
580   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMultAdd_MPIAIJ"
586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr;
590 
591   PetscFunctionBegin;
592   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
594   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
595   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602 {
603   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604   int        ierr;
605 
606   PetscFunctionBegin;
607   /* do nondiagonal part */
608   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
609   /* send it on its way */
610   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
611   /* do local part */
612   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
613   /* receive remote parts: note this assumes the values are not actually */
614   /* inserted in yy until the next line, which is true for my implementation*/
615   /* but is not perhaps always true. */
616   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 EXTERN_C_BEGIN
621 #undef __FUNCT__
622 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
623 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
624 {
625   MPI_Comm comm;
626   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
627   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
628   IS         Me,Notme;
629   int        M,N,first,last,*notme,ntids,i, ierr;
630 
631   PetscFunctionBegin;
632 
633   /* Easy test: symmetric diagonal block */
634   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
635   ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr);
636   if (!*f) PetscFunctionReturn(0);
637   ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr);
638   ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr);
639   if (ntids==1) PetscFunctionReturn(0);
640 
641   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
642   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
643   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
644   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
645   for (i=0; i<first; i++) notme[i] = i;
646   for (i=last; i<M; i++) notme[i-last+first] = i;
647   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
648   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
649   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
650   Aoff = Aoffs[0];
651   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
652   Boff = Boffs[0];
653   ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr);
654   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
655   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
656   ierr = ISDestroy(Me); CHKERRQ(ierr);
657   ierr = ISDestroy(Notme); CHKERRQ(ierr);
658 
659   PetscFunctionReturn(0);
660 }
661 EXTERN_C_END
662 
663 #undef __FUNCT__
664 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
665 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
666 {
667   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
668   int        ierr;
669 
670   PetscFunctionBegin;
671   /* do nondiagonal part */
672   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
673   /* send it on its way */
674   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
675   /* do local part */
676   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
677   /* receive remote parts: note this assumes the values are not actually */
678   /* inserted in yy until the next line, which is true for my implementation*/
679   /* but is not perhaps always true. */
680   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 /*
685   This only works correctly for square matrices where the subblock A->A is the
686    diagonal block
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
690 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
691 {
692   int        ierr;
693   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
694 
695   PetscFunctionBegin;
696   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
697   if (a->rstart != a->cstart || a->rend != a->cend) {
698     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
699   }
700   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatScale_MPIAIJ"
706 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
707 {
708   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
709   int        ierr;
710 
711   PetscFunctionBegin;
712   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
713   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "MatDestroy_MPIAIJ"
719 int MatDestroy_MPIAIJ(Mat mat)
720 {
721   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
722   int        ierr;
723 
724   PetscFunctionBegin;
725 #if defined(PETSC_USE_LOG)
726   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
727 #endif
728   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
729   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
730   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
731   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
732 #if defined (PETSC_USE_CTABLE)
733   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
734 #else
735   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
736 #endif
737   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
738   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
739   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
740   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
741   ierr = PetscFree(aij);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatView_MPIAIJ_Binary"
747 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
748 {
749   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
750   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
751   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
752   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
753   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
754   PetscScalar       *column_values;
755 
756   PetscFunctionBegin;
757   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
758   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
759   nz   = A->nz + B->nz;
760   if (rank == 0) {
761     header[0] = MAT_FILE_COOKIE;
762     header[1] = mat->M;
763     header[2] = mat->N;
764     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
765     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
766     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
767     /* get largest number of rows any processor has */
768     rlen = mat->m;
769     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
770     for (i=1; i<size; i++) {
771       rlen = PetscMax(rlen,range[i+1] - range[i]);
772     }
773   } else {
774     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
775     rlen = mat->m;
776   }
777 
778   /* load up the local row counts */
779   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
780   for (i=0; i<mat->m; i++) {
781     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
782   }
783 
784   /* store the row lengths to the file */
785   if (rank == 0) {
786     MPI_Status status;
787     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
788     for (i=1; i<size; i++) {
789       rlen = range[i+1] - range[i];
790       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
791       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
792     }
793   } else {
794     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
795   }
796   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
797 
798   /* load up the local column indices */
799   nzmax = nz; /* )th processor needs space a largest processor needs */
800   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
801   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
802   cnt  = 0;
803   for (i=0; i<mat->m; i++) {
804     for (j=B->i[i]; j<B->i[i+1]; j++) {
805       if ( (col = garray[B->j[j]]) > cstart) break;
806       column_indices[cnt++] = col;
807     }
808     for (k=A->i[i]; k<A->i[i+1]; k++) {
809       column_indices[cnt++] = A->j[k] + cstart;
810     }
811     for (; j<B->i[i+1]; j++) {
812       column_indices[cnt++] = garray[B->j[j]];
813     }
814   }
815   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
816 
817   /* store the column indices to the file */
818   if (rank == 0) {
819     MPI_Status status;
820     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
821     for (i=1; i<size; i++) {
822       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
824       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
826     }
827   } else {
828     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
829     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
830   }
831   ierr = PetscFree(column_indices);CHKERRQ(ierr);
832 
833   /* load up the local column values */
834   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
835   cnt  = 0;
836   for (i=0; i<mat->m; i++) {
837     for (j=B->i[i]; j<B->i[i+1]; j++) {
838       if ( garray[B->j[j]] > cstart) break;
839       column_values[cnt++] = B->a[j];
840     }
841     for (k=A->i[i]; k<A->i[i+1]; k++) {
842       column_values[cnt++] = A->a[k];
843     }
844     for (; j<B->i[i+1]; j++) {
845       column_values[cnt++] = B->a[j];
846     }
847   }
848   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
849 
850   /* store the column values to the file */
851   if (rank == 0) {
852     MPI_Status status;
853     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
854     for (i=1; i<size; i++) {
855       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
857       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
858       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
859     }
860   } else {
861     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
862     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(column_values);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
870 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
871 {
872   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
873   int               ierr,rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,isascii,flg,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (isascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo info;
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
889       if (flg) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
891 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
894 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
906       PetscFunctionReturn(0);
907     }
908   } else if (isbinary) {
909     if (size == 1) {
910       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
911       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
912     } else {
913       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
914     }
915     PetscFunctionReturn(0);
916   } else if (isdraw) {
917     PetscDraw  draw;
918     PetscTruth isnull;
919     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
920     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
921   }
922 
923   if (size == 1) {
924     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
925     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
926   } else {
927     /* assemble the entire matrix onto first processor. */
928     Mat         A;
929     Mat_SeqAIJ *Aloc;
930     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
931     PetscScalar *a;
932 
933     if (!rank) {
934       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
935     } else {
936       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
937     }
938     PetscLogObjectParent(mat,A);
939 
940     /* copy over the A part */
941     Aloc = (Mat_SeqAIJ*)aij->A->data;
942     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row = aij->rstart;
944     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
945     for (i=0; i<m; i++) {
946       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
947       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
948     }
949     aj = Aloc->j;
950     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
951 
952     /* copy over the B part */
953     Aloc = (Mat_SeqAIJ*)aij->B->data;
954     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row  = aij->rstart;
956     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
957     ct   = cols;
958     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
959     for (i=0; i<m; i++) {
960       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
961       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
962     }
963     ierr = PetscFree(ct);CHKERRQ(ierr);
964     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     /*
967        Everyone has to call to draw the matrix since the graphics waits are
968        synchronized across all processors that share the PetscDraw object
969     */
970     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
971     if (!rank) {
972       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
973       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
974     }
975     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
976     ierr = MatDestroy(A);CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatView_MPIAIJ"
983 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
984 {
985   int        ierr;
986   PetscTruth isascii,isdraw,issocket,isbinary;
987 
988   PetscFunctionBegin;
989   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
990   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
991   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
992   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
993   if (isascii || isdraw || isbinary || issocket) {
994     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
995   } else {
996     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatRelax_MPIAIJ"
1005 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1006 {
1007   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1008   int          ierr;
1009   Vec          bb1;
1010   PetscScalar  mone=-1.0;
1011 
1012   PetscFunctionBegin;
1013   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1014 
1015   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1016 
1017   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1018     if (flag & SOR_ZERO_INITIAL_GUESS) {
1019       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1020       its--;
1021     }
1022 
1023     while (its--) {
1024       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1025       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1026 
1027       /* update rhs: bb1 = bb - B*x */
1028       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1029       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1030 
1031       /* local sweep */
1032       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1033       CHKERRQ(ierr);
1034     }
1035   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1036     if (flag & SOR_ZERO_INITIAL_GUESS) {
1037       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1038       its--;
1039     }
1040     while (its--) {
1041       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1042       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1043 
1044       /* update rhs: bb1 = bb - B*x */
1045       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1046       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1047 
1048       /* local sweep */
1049       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1050       CHKERRQ(ierr);
1051     }
1052   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1053     if (flag & SOR_ZERO_INITIAL_GUESS) {
1054       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1055       its--;
1056     }
1057     while (its--) {
1058       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1060 
1061       /* update rhs: bb1 = bb - B*x */
1062       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1063       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1064 
1065       /* local sweep */
1066       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1067       CHKERRQ(ierr);
1068     }
1069   } else {
1070     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1071   }
1072 
1073   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1079 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080 {
1081   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1082   Mat        A = mat->A,B = mat->B;
1083   int        ierr;
1084   PetscReal  isend[5],irecv[5];
1085 
1086   PetscFunctionBegin;
1087   info->block_size     = 1.0;
1088   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090   isend[3] = info->memory;  isend[4] = info->mallocs;
1091   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093   isend[3] += info->memory;  isend[4] += info->mallocs;
1094   if (flag == MAT_LOCAL) {
1095     info->nz_used      = isend[0];
1096     info->nz_allocated = isend[1];
1097     info->nz_unneeded  = isend[2];
1098     info->memory       = isend[3];
1099     info->mallocs      = isend[4];
1100   } else if (flag == MAT_GLOBAL_MAX) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else if (flag == MAT_GLOBAL_SUM) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   }
1115   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1116   info->fill_ratio_needed = 0;
1117   info->factor_mallocs    = 0;
1118   info->rows_global       = (double)matin->M;
1119   info->columns_global    = (double)matin->N;
1120   info->rows_local        = (double)matin->m;
1121   info->columns_local     = (double)matin->N;
1122 
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatSetOption_MPIAIJ"
1128 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1129 {
1130   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1131   int        ierr;
1132 
1133   PetscFunctionBegin;
1134   switch (op) {
1135   case MAT_NO_NEW_NONZERO_LOCATIONS:
1136   case MAT_YES_NEW_NONZERO_LOCATIONS:
1137   case MAT_COLUMNS_UNSORTED:
1138   case MAT_COLUMNS_SORTED:
1139   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1140   case MAT_KEEP_ZEROED_ROWS:
1141   case MAT_NEW_NONZERO_LOCATION_ERR:
1142   case MAT_USE_INODES:
1143   case MAT_DO_NOT_USE_INODES:
1144   case MAT_IGNORE_ZERO_ENTRIES:
1145     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1146     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1147     break;
1148   case MAT_ROW_ORIENTED:
1149     a->roworiented = PETSC_TRUE;
1150     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1151     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1152     break;
1153   case MAT_ROWS_SORTED:
1154   case MAT_ROWS_UNSORTED:
1155   case MAT_YES_NEW_DIAGONALS:
1156     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1157     break;
1158   case MAT_COLUMN_ORIENTED:
1159     a->roworiented = PETSC_FALSE;
1160     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1161     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1162     break;
1163   case MAT_IGNORE_OFF_PROC_ENTRIES:
1164     a->donotstash = PETSC_TRUE;
1165     break;
1166   case MAT_NO_NEW_DIAGONALS:
1167     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1168   case MAT_SYMMETRIC:
1169   case MAT_STRUCTURALLY_SYMMETRIC:
1170   case MAT_NOT_SYMMETRIC:
1171   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1172   case MAT_HERMITIAN:
1173   case MAT_NOT_HERMITIAN:
1174   case MAT_SYMMETRY_ETERNAL:
1175   case MAT_NOT_SYMMETRY_ETERNAL:
1176     break;
1177   default:
1178     SETERRQ(PETSC_ERR_SUP,"unknown option");
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatGetRow_MPIAIJ"
1185 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1186 {
1187   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1188   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1189   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1190   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1191   int          *cmap,*idx_p;
1192 
1193   PetscFunctionBegin;
1194   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1195   mat->getrowactive = PETSC_TRUE;
1196 
1197   if (!mat->rowvalues && (idx || v)) {
1198     /*
1199         allocate enough space to hold information from the longest row.
1200     */
1201     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1202     int     max = 1,tmp;
1203     for (i=0; i<matin->m; i++) {
1204       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1205       if (max < tmp) { max = tmp; }
1206     }
1207     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1208     mat->rowindices = (int*)(mat->rowvalues + max);
1209   }
1210 
1211   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1212   lrow = row - rstart;
1213 
1214   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1215   if (!v)   {pvA = 0; pvB = 0;}
1216   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1217   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1218   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1219   nztot = nzA + nzB;
1220 
1221   cmap  = mat->garray;
1222   if (v  || idx) {
1223     if (nztot) {
1224       /* Sort by increasing column numbers, assuming A and B already sorted */
1225       int imark = -1;
1226       if (v) {
1227         *v = v_p = mat->rowvalues;
1228         for (i=0; i<nzB; i++) {
1229           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1230           else break;
1231         }
1232         imark = i;
1233         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1234         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1235       }
1236       if (idx) {
1237         *idx = idx_p = mat->rowindices;
1238         if (imark > -1) {
1239           for (i=0; i<imark; i++) {
1240             idx_p[i] = cmap[cworkB[i]];
1241           }
1242         } else {
1243           for (i=0; i<nzB; i++) {
1244             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1245             else break;
1246           }
1247           imark = i;
1248         }
1249         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1250         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1251       }
1252     } else {
1253       if (idx) *idx = 0;
1254       if (v)   *v   = 0;
1255     }
1256   }
1257   *nz = nztot;
1258   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1259   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1265 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1266 {
1267   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1268 
1269   PetscFunctionBegin;
1270   if (aij->getrowactive == PETSC_FALSE) {
1271     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1272   }
1273   aij->getrowactive = PETSC_FALSE;
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatNorm_MPIAIJ"
1279 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1280 {
1281   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1282   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1283   int          ierr,i,j,cstart = aij->cstart;
1284   PetscReal    sum = 0.0;
1285   PetscScalar  *v;
1286 
1287   PetscFunctionBegin;
1288   if (aij->size == 1) {
1289     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1290   } else {
1291     if (type == NORM_FROBENIUS) {
1292       v = amat->a;
1293       for (i=0; i<amat->nz; i++) {
1294 #if defined(PETSC_USE_COMPLEX)
1295         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1296 #else
1297         sum += (*v)*(*v); v++;
1298 #endif
1299       }
1300       v = bmat->a;
1301       for (i=0; i<bmat->nz; i++) {
1302 #if defined(PETSC_USE_COMPLEX)
1303         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1304 #else
1305         sum += (*v)*(*v); v++;
1306 #endif
1307       }
1308       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1309       *norm = sqrt(*norm);
1310     } else if (type == NORM_1) { /* max column norm */
1311       PetscReal *tmp,*tmp2;
1312       int    *jj,*garray = aij->garray;
1313       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1314       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1315       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1316       *norm = 0.0;
1317       v = amat->a; jj = amat->j;
1318       for (j=0; j<amat->nz; j++) {
1319         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1320       }
1321       v = bmat->a; jj = bmat->j;
1322       for (j=0; j<bmat->nz; j++) {
1323         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1324       }
1325       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1326       for (j=0; j<mat->N; j++) {
1327         if (tmp2[j] > *norm) *norm = tmp2[j];
1328       }
1329       ierr = PetscFree(tmp);CHKERRQ(ierr);
1330       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1331     } else if (type == NORM_INFINITY) { /* max row norm */
1332       PetscReal ntemp = 0.0;
1333       for (j=0; j<aij->A->m; j++) {
1334         v = amat->a + amat->i[j];
1335         sum = 0.0;
1336         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1337           sum += PetscAbsScalar(*v); v++;
1338         }
1339         v = bmat->a + bmat->i[j];
1340         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1341           sum += PetscAbsScalar(*v); v++;
1342         }
1343         if (sum > ntemp) ntemp = sum;
1344       }
1345       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1346     } else {
1347       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1348     }
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatTranspose_MPIAIJ"
1355 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1356 {
1357   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1358   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1359   int          ierr;
1360   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1361   Mat          B;
1362   PetscScalar  *array;
1363 
1364   PetscFunctionBegin;
1365   if (!matout && M != N) {
1366     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1367   }
1368 
1369   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1370 
1371   /* copy over the A part */
1372   Aloc = (Mat_SeqAIJ*)a->A->data;
1373   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1374   row = a->rstart;
1375   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1376   for (i=0; i<m; i++) {
1377     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1378     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1379   }
1380   aj = Aloc->j;
1381   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1382 
1383   /* copy over the B part */
1384   Aloc = (Mat_SeqAIJ*)a->B->data;
1385   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1386   row  = a->rstart;
1387   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1388   ct   = cols;
1389   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1390   for (i=0; i<m; i++) {
1391     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1392     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1393   }
1394   ierr = PetscFree(ct);CHKERRQ(ierr);
1395   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397   if (matout) {
1398     *matout = B;
1399   } else {
1400     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1407 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1408 {
1409   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1410   Mat        a = aij->A,b = aij->B;
1411   int        ierr,s1,s2,s3;
1412 
1413   PetscFunctionBegin;
1414   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1415   if (rr) {
1416     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1417     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1418     /* Overlap communication with computation. */
1419     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1420   }
1421   if (ll) {
1422     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1423     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1424     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1425   }
1426   /* scale  the diagonal block */
1427   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1428 
1429   if (rr) {
1430     /* Do a scatter end and then right scale the off-diagonal block */
1431     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1432     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1433   }
1434 
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1441 int MatPrintHelp_MPIAIJ(Mat A)
1442 {
1443   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1444   int        ierr;
1445 
1446   PetscFunctionBegin;
1447   if (!a->rank) {
1448     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1449   }
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1455 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1456 {
1457   PetscFunctionBegin;
1458   *bs = 1;
1459   PetscFunctionReturn(0);
1460 }
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1463 int MatSetUnfactored_MPIAIJ(Mat A)
1464 {
1465   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1466   int        ierr;
1467 
1468   PetscFunctionBegin;
1469   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatEqual_MPIAIJ"
1475 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1476 {
1477   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1478   Mat        a,b,c,d;
1479   PetscTruth flg;
1480   int        ierr;
1481 
1482   PetscFunctionBegin;
1483   a = matA->A; b = matA->B;
1484   c = matB->A; d = matB->B;
1485 
1486   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1487   if (flg == PETSC_TRUE) {
1488     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1489   }
1490   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatCopy_MPIAIJ"
1496 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1497 {
1498   int        ierr;
1499   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1500   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1501 
1502   PetscFunctionBegin;
1503   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1504   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1505     /* because of the column compression in the off-processor part of the matrix a->B,
1506        the number of columns in a->B and b->B may be different, hence we cannot call
1507        the MatCopy() directly on the two parts. If need be, we can provide a more
1508        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1509        then copying the submatrices */
1510     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1511   } else {
1512     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1513     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1520 int MatSetUpPreallocation_MPIAIJ(Mat A)
1521 {
1522   int        ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #include "petscblaslapack.h"
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatAXPY_MPIAIJ"
1532 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1533 {
1534   int        ierr,one=1,i;
1535   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1536   Mat_SeqAIJ *x,*y;
1537 
1538   PetscFunctionBegin;
1539   if (str == SAME_NONZERO_PATTERN) {
1540     x = (Mat_SeqAIJ *)xx->A->data;
1541     y = (Mat_SeqAIJ *)yy->A->data;
1542     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1543     x = (Mat_SeqAIJ *)xx->B->data;
1544     y = (Mat_SeqAIJ *)yy->B->data;
1545     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1546   } else if (str == SUBSET_NONZERO_PATTERN) {
1547     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1548 
1549     x = (Mat_SeqAIJ *)xx->B->data;
1550     y = (Mat_SeqAIJ *)yy->B->data;
1551     if (y->xtoy && y->XtoY != xx->B) {
1552       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1553       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1554     }
1555     if (!y->xtoy) { /* get xtoy */
1556       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1557       y->XtoY = xx->B;
1558     }
1559     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1560   } else {
1561     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 /* -------------------------------------------------------------------*/
1567 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1568        MatGetRow_MPIAIJ,
1569        MatRestoreRow_MPIAIJ,
1570        MatMult_MPIAIJ,
1571 /* 4*/ MatMultAdd_MPIAIJ,
1572        MatMultTranspose_MPIAIJ,
1573        MatMultTransposeAdd_MPIAIJ,
1574        0,
1575        0,
1576        0,
1577 /*10*/ 0,
1578        0,
1579        0,
1580        MatRelax_MPIAIJ,
1581        MatTranspose_MPIAIJ,
1582 /*15*/ MatGetInfo_MPIAIJ,
1583        MatEqual_MPIAIJ,
1584        MatGetDiagonal_MPIAIJ,
1585        MatDiagonalScale_MPIAIJ,
1586        MatNorm_MPIAIJ,
1587 /*20*/ MatAssemblyBegin_MPIAIJ,
1588        MatAssemblyEnd_MPIAIJ,
1589        0,
1590        MatSetOption_MPIAIJ,
1591        MatZeroEntries_MPIAIJ,
1592 /*25*/ MatZeroRows_MPIAIJ,
1593 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1594        MatLUFactorSymbolic_MPIAIJ_TFS,
1595 #else
1596        0,
1597 #endif
1598        0,
1599        0,
1600        0,
1601 /*30*/ MatSetUpPreallocation_MPIAIJ,
1602        0,
1603        0,
1604        0,
1605        0,
1606 /*35*/ MatDuplicate_MPIAIJ,
1607        0,
1608        0,
1609        0,
1610        0,
1611 /*40*/ MatAXPY_MPIAIJ,
1612        MatGetSubMatrices_MPIAIJ,
1613        MatIncreaseOverlap_MPIAIJ,
1614        MatGetValues_MPIAIJ,
1615        MatCopy_MPIAIJ,
1616 /*45*/ MatPrintHelp_MPIAIJ,
1617        MatScale_MPIAIJ,
1618        0,
1619        0,
1620        0,
1621 /*50*/ MatGetBlockSize_MPIAIJ,
1622        0,
1623        0,
1624        0,
1625        0,
1626 /*55*/ MatFDColoringCreate_MPIAIJ,
1627        0,
1628        MatSetUnfactored_MPIAIJ,
1629        0,
1630        0,
1631 /*60*/ MatGetSubMatrix_MPIAIJ,
1632        MatDestroy_MPIAIJ,
1633        MatView_MPIAIJ,
1634        MatGetPetscMaps_Petsc,
1635        0,
1636 /*65*/ 0,
1637        0,
1638        0,
1639        0,
1640        0,
1641 /*70*/ 0,
1642        0,
1643        MatSetColoring_MPIAIJ,
1644        MatSetValuesAdic_MPIAIJ,
1645        MatSetValuesAdifor_MPIAIJ,
1646 /*75*/ 0,
1647        0,
1648        0,
1649        0,
1650        0,
1651 /*80*/ 0,
1652        0,
1653        0,
1654        0,
1655        0,
1656 /*85*/ MatLoad_MPIAIJ};
1657 
1658 /* ----------------------------------------------------------------------------------------*/
1659 
1660 EXTERN_C_BEGIN
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1663 int MatStoreValues_MPIAIJ(Mat mat)
1664 {
1665   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1666   int        ierr;
1667 
1668   PetscFunctionBegin;
1669   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1670   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 EXTERN_C_END
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1678 int MatRetrieveValues_MPIAIJ(Mat mat)
1679 {
1680   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1681   int        ierr;
1682 
1683   PetscFunctionBegin;
1684   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1685   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 EXTERN_C_END
1689 
1690 #include "petscpc.h"
1691 EXTERN_C_BEGIN
1692 #undef __FUNCT__
1693 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1694 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1695 {
1696   Mat_MPIAIJ   *b;
1697   int          ierr,i;
1698 
1699   PetscFunctionBegin;
1700   B->preallocated = PETSC_TRUE;
1701   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1702   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1703   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1704   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1705   if (d_nnz) {
1706     for (i=0; i<B->m; i++) {
1707       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1708     }
1709   }
1710   if (o_nnz) {
1711     for (i=0; i<B->m; i++) {
1712       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1713     }
1714   }
1715   b = (Mat_MPIAIJ*)B->data;
1716 
1717   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1718   PetscLogObjectParent(B,b->A);
1719   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1720   PetscLogObjectParent(B,b->B);
1721 
1722   PetscFunctionReturn(0);
1723 }
1724 EXTERN_C_END
1725 
1726 /*MC
1727    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1728 
1729    Options Database Keys:
1730 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1731 
1732   Level: beginner
1733 
1734 .seealso: MatCreateMPIAIJ
1735 M*/
1736 
1737 EXTERN_C_BEGIN
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatCreate_MPIAIJ"
1740 int MatCreate_MPIAIJ(Mat B)
1741 {
1742   Mat_MPIAIJ *b;
1743   int        ierr,i,size;
1744 
1745   PetscFunctionBegin;
1746   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1747 
1748   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1749   B->data         = (void*)b;
1750   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1751   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1752   B->factor       = 0;
1753   B->assembled    = PETSC_FALSE;
1754   B->mapping      = 0;
1755 
1756   B->insertmode      = NOT_SET_VALUES;
1757   b->size            = size;
1758   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1759 
1760   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1761   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1762 
1763   /* the information in the maps duplicates the information computed below, eventually
1764      we should remove the duplicate information that is not contained in the maps */
1765   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1766   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1767 
1768   /* build local table of row and column ownerships */
1769   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1770   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1771   b->cowners = b->rowners + b->size + 2;
1772   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1773   b->rowners[0] = 0;
1774   for (i=2; i<=b->size; i++) {
1775     b->rowners[i] += b->rowners[i-1];
1776   }
1777   b->rstart = b->rowners[b->rank];
1778   b->rend   = b->rowners[b->rank+1];
1779   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1780   b->cowners[0] = 0;
1781   for (i=2; i<=b->size; i++) {
1782     b->cowners[i] += b->cowners[i-1];
1783   }
1784   b->cstart = b->cowners[b->rank];
1785   b->cend   = b->cowners[b->rank+1];
1786 
1787   /* build cache for off array entries formed */
1788   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1789   b->donotstash  = PETSC_FALSE;
1790   b->colmap      = 0;
1791   b->garray      = 0;
1792   b->roworiented = PETSC_TRUE;
1793 
1794   /* stuff used for matrix vector multiply */
1795   b->lvec      = PETSC_NULL;
1796   b->Mvctx     = PETSC_NULL;
1797 
1798   /* stuff for MatGetRow() */
1799   b->rowindices   = 0;
1800   b->rowvalues    = 0;
1801   b->getrowactive = PETSC_FALSE;
1802 
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1804                                      "MatStoreValues_MPIAIJ",
1805                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1807                                      "MatRetrieveValues_MPIAIJ",
1808                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1810 				     "MatGetDiagonalBlock_MPIAIJ",
1811                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1813 				     "MatIsTranspose_MPIAIJ",
1814 				     MatIsTranspose_MPIAIJ); CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1816 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1817 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1819 				     "MatDiagonalScaleLocal_MPIAIJ",
1820 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 EXTERN_C_END
1824 
1825 /*MC
1826    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1827 
1828    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1829    and MATMPIAIJ otherwise.
1830 
1831    Options Database Keys:
1832 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1833 
1834   Level: beginner
1835 
1836 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1837 M*/
1838 
1839 EXTERN_C_BEGIN
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatCreate_AIJ"
1842 int MatCreate_AIJ(Mat A) {
1843   int ierr,size;
1844 
1845   PetscFunctionBegin;
1846   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1847   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1848   if (size == 1) {
1849     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1850   } else {
1851     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1852   }
1853   PetscFunctionReturn(0);
1854 }
1855 EXTERN_C_END
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1859 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1860 {
1861   Mat        mat;
1862   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1863   int        ierr;
1864 
1865   PetscFunctionBegin;
1866   *newmat       = 0;
1867   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1868   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1869   a    = (Mat_MPIAIJ*)mat->data;
1870   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1871   mat->factor       = matin->factor;
1872   mat->assembled    = PETSC_TRUE;
1873   mat->insertmode   = NOT_SET_VALUES;
1874   mat->preallocated = PETSC_TRUE;
1875 
1876   a->rstart       = oldmat->rstart;
1877   a->rend         = oldmat->rend;
1878   a->cstart       = oldmat->cstart;
1879   a->cend         = oldmat->cend;
1880   a->size         = oldmat->size;
1881   a->rank         = oldmat->rank;
1882   a->donotstash   = oldmat->donotstash;
1883   a->roworiented  = oldmat->roworiented;
1884   a->rowindices   = 0;
1885   a->rowvalues    = 0;
1886   a->getrowactive = PETSC_FALSE;
1887 
1888   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1889   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1890   if (oldmat->colmap) {
1891 #if defined (PETSC_USE_CTABLE)
1892     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1893 #else
1894     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1895     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1896     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1897 #endif
1898   } else a->colmap = 0;
1899   if (oldmat->garray) {
1900     int len;
1901     len  = oldmat->B->n;
1902     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1903     PetscLogObjectMemory(mat,len*sizeof(int));
1904     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1905   } else a->garray = 0;
1906 
1907   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1908   PetscLogObjectParent(mat,a->lvec);
1909   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1910   PetscLogObjectParent(mat,a->Mvctx);
1911   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1912   PetscLogObjectParent(mat,a->A);
1913   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1914   PetscLogObjectParent(mat,a->B);
1915   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1916   *newmat = mat;
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #include "petscsys.h"
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatLoad_MPIAIJ"
1924 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1925 {
1926   Mat          A;
1927   PetscScalar  *vals,*svals;
1928   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1929   MPI_Status   status;
1930   int          i,nz,ierr,j,rstart,rend,fd;
1931   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1932   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1933   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1934 
1935   PetscFunctionBegin;
1936   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1937   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1938   if (!rank) {
1939     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1940     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1941     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1942     if (header[3] < 0) {
1943       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1944     }
1945   }
1946 
1947   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1948   M = header[1]; N = header[2];
1949   /* determine ownership of all rows */
1950   m = M/size + ((M % size) > rank);
1951   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1952   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1953   rowners[0] = 0;
1954   for (i=2; i<=size; i++) {
1955     rowners[i] += rowners[i-1];
1956   }
1957   rstart = rowners[rank];
1958   rend   = rowners[rank+1];
1959 
1960   /* distribute row lengths to all processors */
1961   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1962   offlens = ourlens + (rend-rstart);
1963   if (!rank) {
1964     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1965     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1966     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1967     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1968     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1969     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1970   } else {
1971     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1972   }
1973 
1974   if (!rank) {
1975     /* calculate the number of nonzeros on each processor */
1976     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1977     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1978     for (i=0; i<size; i++) {
1979       for (j=rowners[i]; j< rowners[i+1]; j++) {
1980         procsnz[i] += rowlengths[j];
1981       }
1982     }
1983     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1984 
1985     /* determine max buffer needed and allocate it */
1986     maxnz = 0;
1987     for (i=0; i<size; i++) {
1988       maxnz = PetscMax(maxnz,procsnz[i]);
1989     }
1990     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1991 
1992     /* read in my part of the matrix column indices  */
1993     nz   = procsnz[0];
1994     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1995     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1996 
1997     /* read in every one elses and ship off */
1998     for (i=1; i<size; i++) {
1999       nz   = procsnz[i];
2000       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2001       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2002     }
2003     ierr = PetscFree(cols);CHKERRQ(ierr);
2004   } else {
2005     /* determine buffer space needed for message */
2006     nz = 0;
2007     for (i=0; i<m; i++) {
2008       nz += ourlens[i];
2009     }
2010     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2011 
2012     /* receive message of column indices*/
2013     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2014     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2015     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2016   }
2017 
2018   /* determine column ownership if matrix is not square */
2019   if (N != M) {
2020     n      = N/size + ((N % size) > rank);
2021     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2022     cstart = cend - n;
2023   } else {
2024     cstart = rstart;
2025     cend   = rend;
2026     n      = cend - cstart;
2027   }
2028 
2029   /* loop over local rows, determining number of off diagonal entries */
2030   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2031   jj = 0;
2032   for (i=0; i<m; i++) {
2033     for (j=0; j<ourlens[i]; j++) {
2034       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2035       jj++;
2036     }
2037   }
2038 
2039   /* create our matrix */
2040   for (i=0; i<m; i++) {
2041     ourlens[i] -= offlens[i];
2042   }
2043   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2044   ierr = MatSetType(A,type);CHKERRQ(ierr);
2045   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2046 
2047   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2048   for (i=0; i<m; i++) {
2049     ourlens[i] += offlens[i];
2050   }
2051 
2052   if (!rank) {
2053     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2054 
2055     /* read in my part of the matrix numerical values  */
2056     nz   = procsnz[0];
2057     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2058 
2059     /* insert into matrix */
2060     jj      = rstart;
2061     smycols = mycols;
2062     svals   = vals;
2063     for (i=0; i<m; i++) {
2064       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2065       smycols += ourlens[i];
2066       svals   += ourlens[i];
2067       jj++;
2068     }
2069 
2070     /* read in other processors and ship out */
2071     for (i=1; i<size; i++) {
2072       nz   = procsnz[i];
2073       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2074       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2075     }
2076     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2077   } else {
2078     /* receive numeric values */
2079     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2080 
2081     /* receive message of values*/
2082     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2083     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2084     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2085 
2086     /* insert into matrix */
2087     jj      = rstart;
2088     smycols = mycols;
2089     svals   = vals;
2090     for (i=0; i<m; i++) {
2091       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2092       smycols += ourlens[i];
2093       svals   += ourlens[i];
2094       jj++;
2095     }
2096   }
2097   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2098   ierr = PetscFree(vals);CHKERRQ(ierr);
2099   ierr = PetscFree(mycols);CHKERRQ(ierr);
2100   ierr = PetscFree(rowners);CHKERRQ(ierr);
2101 
2102   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2103   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2104   *newmat = A;
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2110 /*
2111     Not great since it makes two copies of the submatrix, first an SeqAIJ
2112   in local and then by concatenating the local matrices the end result.
2113   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2114 */
2115 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2116 {
2117   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2118   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2119   Mat          *local,M,Mreuse;
2120   PetscScalar  *vwork,*aa;
2121   MPI_Comm     comm = mat->comm;
2122   Mat_SeqAIJ   *aij;
2123 
2124 
2125   PetscFunctionBegin;
2126   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2127   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2128 
2129   if (call ==  MAT_REUSE_MATRIX) {
2130     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2131     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2132     local = &Mreuse;
2133     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2134   } else {
2135     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2136     Mreuse = *local;
2137     ierr   = PetscFree(local);CHKERRQ(ierr);
2138   }
2139 
2140   /*
2141       m - number of local rows
2142       n - number of columns (same on all processors)
2143       rstart - first row in new global matrix generated
2144   */
2145   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2146   if (call == MAT_INITIAL_MATRIX) {
2147     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2148     ii  = aij->i;
2149     jj  = aij->j;
2150 
2151     /*
2152         Determine the number of non-zeros in the diagonal and off-diagonal
2153         portions of the matrix in order to do correct preallocation
2154     */
2155 
2156     /* first get start and end of "diagonal" columns */
2157     if (csize == PETSC_DECIDE) {
2158       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2159       if (mglobal == n) { /* square matrix */
2160 	nlocal = m;
2161       } else {
2162         nlocal = n/size + ((n % size) > rank);
2163       }
2164     } else {
2165       nlocal = csize;
2166     }
2167     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2168     rstart = rend - nlocal;
2169     if (rank == size - 1 && rend != n) {
2170       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2171     }
2172 
2173     /* next, compute all the lengths */
2174     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2175     olens = dlens + m;
2176     for (i=0; i<m; i++) {
2177       jend = ii[i+1] - ii[i];
2178       olen = 0;
2179       dlen = 0;
2180       for (j=0; j<jend; j++) {
2181         if (*jj < rstart || *jj >= rend) olen++;
2182         else dlen++;
2183         jj++;
2184       }
2185       olens[i] = olen;
2186       dlens[i] = dlen;
2187     }
2188     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2189     ierr = PetscFree(dlens);CHKERRQ(ierr);
2190   } else {
2191     int ml,nl;
2192 
2193     M = *newmat;
2194     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2195     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2196     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2197     /*
2198          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2199        rather than the slower MatSetValues().
2200     */
2201     M->was_assembled = PETSC_TRUE;
2202     M->assembled     = PETSC_FALSE;
2203   }
2204   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2205   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2206   ii  = aij->i;
2207   jj  = aij->j;
2208   aa  = aij->a;
2209   for (i=0; i<m; i++) {
2210     row   = rstart + i;
2211     nz    = ii[i+1] - ii[i];
2212     cwork = jj;     jj += nz;
2213     vwork = aa;     aa += nz;
2214     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2215   }
2216 
2217   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2218   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2219   *newmat = M;
2220 
2221   /* save submatrix used in processor for next request */
2222   if (call ==  MAT_INITIAL_MATRIX) {
2223     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2224     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2225   }
2226 
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2232 /*@C
2233    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2234    (the default parallel PETSc format).  For good matrix assembly performance
2235    the user should preallocate the matrix storage by setting the parameters
2236    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2237    performance can be increased by more than a factor of 50.
2238 
2239    Collective on MPI_Comm
2240 
2241    Input Parameters:
2242 +  A - the matrix
2243 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2244            (same value is used for all local rows)
2245 .  d_nnz - array containing the number of nonzeros in the various rows of the
2246            DIAGONAL portion of the local submatrix (possibly different for each row)
2247            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2248            The size of this array is equal to the number of local rows, i.e 'm'.
2249            You must leave room for the diagonal entry even if it is zero.
2250 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2251            submatrix (same value is used for all local rows).
2252 -  o_nnz - array containing the number of nonzeros in the various rows of the
2253            OFF-DIAGONAL portion of the local submatrix (possibly different for
2254            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2255            structure. The size of this array is equal to the number
2256            of local rows, i.e 'm'.
2257 
2258    The AIJ format (also called the Yale sparse matrix format or
2259    compressed row storage), is fully compatible with standard Fortran 77
2260    storage.  That is, the stored row and column indices can begin at
2261    either one (as in Fortran) or zero.  See the users manual for details.
2262 
2263    The user MUST specify either the local or global matrix dimensions
2264    (possibly both).
2265 
2266    The parallel matrix is partitioned such that the first m0 rows belong to
2267    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2268    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2269 
2270    The DIAGONAL portion of the local submatrix of a processor can be defined
2271    as the submatrix which is obtained by extraction the part corresponding
2272    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2273    first row that belongs to the processor, and r2 is the last row belonging
2274    to the this processor. This is a square mxm matrix. The remaining portion
2275    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2276 
2277    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2278 
2279    By default, this format uses inodes (identical nodes) when possible.
2280    We search for consecutive rows with the same nonzero structure, thereby
2281    reusing matrix information to achieve increased efficiency.
2282 
2283    Options Database Keys:
2284 +  -mat_aij_no_inode  - Do not use inodes
2285 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2286 -  -mat_aij_oneindex - Internally use indexing starting at 1
2287         rather than 0.  Note that when calling MatSetValues(),
2288         the user still MUST index entries starting at 0!
2289 
2290    Example usage:
2291 
2292    Consider the following 8x8 matrix with 34 non-zero values, that is
2293    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2294    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2295    as follows:
2296 
2297 .vb
2298             1  2  0  |  0  3  0  |  0  4
2299     Proc0   0  5  6  |  7  0  0  |  8  0
2300             9  0 10  | 11  0  0  | 12  0
2301     -------------------------------------
2302            13  0 14  | 15 16 17  |  0  0
2303     Proc1   0 18  0  | 19 20 21  |  0  0
2304             0  0  0  | 22 23  0  | 24  0
2305     -------------------------------------
2306     Proc2  25 26 27  |  0  0 28  | 29  0
2307            30  0  0  | 31 32 33  |  0 34
2308 .ve
2309 
2310    This can be represented as a collection of submatrices as:
2311 
2312 .vb
2313       A B C
2314       D E F
2315       G H I
2316 .ve
2317 
2318    Where the submatrices A,B,C are owned by proc0, D,E,F are
2319    owned by proc1, G,H,I are owned by proc2.
2320 
2321    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2322    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2323    The 'M','N' parameters are 8,8, and have the same values on all procs.
2324 
2325    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2326    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2327    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2328    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2329    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2330    matrix, ans [DF] as another SeqAIJ matrix.
2331 
2332    When d_nz, o_nz parameters are specified, d_nz storage elements are
2333    allocated for every row of the local diagonal submatrix, and o_nz
2334    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2335    One way to choose d_nz and o_nz is to use the max nonzerors per local
2336    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2337    In this case, the values of d_nz,o_nz are:
2338 .vb
2339      proc0 : dnz = 2, o_nz = 2
2340      proc1 : dnz = 3, o_nz = 2
2341      proc2 : dnz = 1, o_nz = 4
2342 .ve
2343    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2344    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2345    for proc3. i.e we are using 12+15+10=37 storage locations to store
2346    34 values.
2347 
2348    When d_nnz, o_nnz parameters are specified, the storage is specified
2349    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2350    In the above case the values for d_nnz,o_nnz are:
2351 .vb
2352      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2353      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2354      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2355 .ve
2356    Here the space allocated is sum of all the above values i.e 34, and
2357    hence pre-allocation is perfect.
2358 
2359    Level: intermediate
2360 
2361 .keywords: matrix, aij, compressed row, sparse, parallel
2362 
2363 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2364 @*/
2365 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2366 {
2367   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2368 
2369   PetscFunctionBegin;
2370   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2371   if (f) {
2372     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2373   }
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatCreateMPIAIJ"
2379 /*@C
2380    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2381    (the default parallel PETSc format).  For good matrix assembly performance
2382    the user should preallocate the matrix storage by setting the parameters
2383    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2384    performance can be increased by more than a factor of 50.
2385 
2386    Collective on MPI_Comm
2387 
2388    Input Parameters:
2389 +  comm - MPI communicator
2390 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2391            This value should be the same as the local size used in creating the
2392            y vector for the matrix-vector product y = Ax.
2393 .  n - This value should be the same as the local size used in creating the
2394        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2395        calculated if N is given) For square matrices n is almost always m.
2396 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2397 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2398 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2399            (same value is used for all local rows)
2400 .  d_nnz - array containing the number of nonzeros in the various rows of the
2401            DIAGONAL portion of the local submatrix (possibly different for each row)
2402            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2403            The size of this array is equal to the number of local rows, i.e 'm'.
2404            You must leave room for the diagonal entry even if it is zero.
2405 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2406            submatrix (same value is used for all local rows).
2407 -  o_nnz - array containing the number of nonzeros in the various rows of the
2408            OFF-DIAGONAL portion of the local submatrix (possibly different for
2409            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2410            structure. The size of this array is equal to the number
2411            of local rows, i.e 'm'.
2412 
2413    Output Parameter:
2414 .  A - the matrix
2415 
2416    Notes:
2417    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2418    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2419    storage requirements for this matrix.
2420 
2421    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2422    processor than it must be used on all processors that share the object for
2423    that argument.
2424 
2425    The AIJ format (also called the Yale sparse matrix format or
2426    compressed row storage), is fully compatible with standard Fortran 77
2427    storage.  That is, the stored row and column indices can begin at
2428    either one (as in Fortran) or zero.  See the users manual for details.
2429 
2430    The user MUST specify either the local or global matrix dimensions
2431    (possibly both).
2432 
2433    The parallel matrix is partitioned such that the first m0 rows belong to
2434    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2435    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2436 
2437    The DIAGONAL portion of the local submatrix of a processor can be defined
2438    as the submatrix which is obtained by extraction the part corresponding
2439    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2440    first row that belongs to the processor, and r2 is the last row belonging
2441    to the this processor. This is a square mxm matrix. The remaining portion
2442    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2443 
2444    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2445 
2446    When calling this routine with a single process communicator, a matrix of
2447    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2448    type of communicator, use the construction mechanism:
2449      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2450 
2451    By default, this format uses inodes (identical nodes) when possible.
2452    We search for consecutive rows with the same nonzero structure, thereby
2453    reusing matrix information to achieve increased efficiency.
2454 
2455    Options Database Keys:
2456 +  -mat_aij_no_inode  - Do not use inodes
2457 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2458 -  -mat_aij_oneindex - Internally use indexing starting at 1
2459         rather than 0.  Note that when calling MatSetValues(),
2460         the user still MUST index entries starting at 0!
2461 
2462 
2463    Example usage:
2464 
2465    Consider the following 8x8 matrix with 34 non-zero values, that is
2466    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2467    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2468    as follows:
2469 
2470 .vb
2471             1  2  0  |  0  3  0  |  0  4
2472     Proc0   0  5  6  |  7  0  0  |  8  0
2473             9  0 10  | 11  0  0  | 12  0
2474     -------------------------------------
2475            13  0 14  | 15 16 17  |  0  0
2476     Proc1   0 18  0  | 19 20 21  |  0  0
2477             0  0  0  | 22 23  0  | 24  0
2478     -------------------------------------
2479     Proc2  25 26 27  |  0  0 28  | 29  0
2480            30  0  0  | 31 32 33  |  0 34
2481 .ve
2482 
2483    This can be represented as a collection of submatrices as:
2484 
2485 .vb
2486       A B C
2487       D E F
2488       G H I
2489 .ve
2490 
2491    Where the submatrices A,B,C are owned by proc0, D,E,F are
2492    owned by proc1, G,H,I are owned by proc2.
2493 
2494    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2495    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2496    The 'M','N' parameters are 8,8, and have the same values on all procs.
2497 
2498    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2499    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2500    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2501    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2502    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2503    matrix, ans [DF] as another SeqAIJ matrix.
2504 
2505    When d_nz, o_nz parameters are specified, d_nz storage elements are
2506    allocated for every row of the local diagonal submatrix, and o_nz
2507    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2508    One way to choose d_nz and o_nz is to use the max nonzerors per local
2509    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2510    In this case, the values of d_nz,o_nz are:
2511 .vb
2512      proc0 : dnz = 2, o_nz = 2
2513      proc1 : dnz = 3, o_nz = 2
2514      proc2 : dnz = 1, o_nz = 4
2515 .ve
2516    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2517    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2518    for proc3. i.e we are using 12+15+10=37 storage locations to store
2519    34 values.
2520 
2521    When d_nnz, o_nnz parameters are specified, the storage is specified
2522    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2523    In the above case the values for d_nnz,o_nnz are:
2524 .vb
2525      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2526      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2527      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2528 .ve
2529    Here the space allocated is sum of all the above values i.e 34, and
2530    hence pre-allocation is perfect.
2531 
2532    Level: intermediate
2533 
2534 .keywords: matrix, aij, compressed row, sparse, parallel
2535 
2536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2537 @*/
2538 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2539 {
2540   int ierr,size;
2541 
2542   PetscFunctionBegin;
2543   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2545   if (size > 1) {
2546     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2547     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2548   } else {
2549     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2550     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2551   }
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2557 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2558 {
2559   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2560   PetscFunctionBegin;
2561   *Ad     = a->A;
2562   *Ao     = a->B;
2563   *colmap = a->garray;
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2569 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2570 {
2571   int        ierr,i;
2572   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2573 
2574   PetscFunctionBegin;
2575   if (coloring->ctype == IS_COLORING_LOCAL) {
2576     ISColoringValue *allcolors,*colors;
2577     ISColoring      ocoloring;
2578 
2579     /* set coloring for diagonal portion */
2580     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2581 
2582     /* set coloring for off-diagonal portion */
2583     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2584     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2585     for (i=0; i<a->B->n; i++) {
2586       colors[i] = allcolors[a->garray[i]];
2587     }
2588     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2589     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2590     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2591     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2592   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2593     ISColoringValue *colors;
2594     int             *larray;
2595     ISColoring      ocoloring;
2596 
2597     /* set coloring for diagonal portion */
2598     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2599     for (i=0; i<a->A->n; i++) {
2600       larray[i] = i + a->cstart;
2601     }
2602     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2603     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2604     for (i=0; i<a->A->n; i++) {
2605       colors[i] = coloring->colors[larray[i]];
2606     }
2607     ierr = PetscFree(larray);CHKERRQ(ierr);
2608     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2609     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2610     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2611 
2612     /* set coloring for off-diagonal portion */
2613     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2614     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2615     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2616     for (i=0; i<a->B->n; i++) {
2617       colors[i] = coloring->colors[larray[i]];
2618     }
2619     ierr = PetscFree(larray);CHKERRQ(ierr);
2620     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2621     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2622     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2623   } else {
2624     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2625   }
2626 
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2632 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2633 {
2634   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2635   int        ierr;
2636 
2637   PetscFunctionBegin;
2638   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2639   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2645 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2646 {
2647   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2648   int        ierr;
2649 
2650   PetscFunctionBegin;
2651   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2652   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatMerge"
2658 /*@C
2659       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2660                  matrices from each processor
2661 
2662     Collective on MPI_Comm
2663 
2664    Input Parameters:
2665 +    comm - the communicators the parallel matrix will live on
2666 -    inmat - the input sequential matrices
2667 
2668    Output Parameter:
2669 .    outmat - the parallel matrix generated
2670 
2671     Level: advanced
2672 
2673    Notes: The number of columns of the matrix in EACH of the seperate files
2674       MUST be the same.
2675 
2676 @*/
2677 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2678 {
2679   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2680   PetscScalar *values;
2681   PetscMap    columnmap,rowmap;
2682 
2683   PetscFunctionBegin;
2684 
2685   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2686 
2687   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2688   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2689   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2690   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2691   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2692   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2693 
2694   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2695   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2696   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2697   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2698   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2699 
2700   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2701   for (i=0;i<m;i++) {
2702     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2703     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2704     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2705   }
2706   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2707   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2708 
2709   for (i=0;i<m;i++) {
2710     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2711     I    = i + rstart;
2712     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2713     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2714   }
2715   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2716   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2717   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2718 
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatFileSplit"
2724 int MatFileSplit(Mat A,char *outfile)
2725 {
2726   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2727   PetscViewer out;
2728   char        *name;
2729   Mat         B;
2730   PetscScalar *values;
2731 
2732   PetscFunctionBegin;
2733 
2734   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2735   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2736   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2737   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2738   for (i=0;i<m;i++) {
2739     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2740     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2741     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2742   }
2743   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2744   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2745 
2746   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2747   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2748   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2749   sprintf(name,"%s.%d",outfile,rank);
2750   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2751   ierr = PetscFree(name);
2752   ierr = MatView(B,out);CHKERRQ(ierr);
2753   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2754   ierr = MatDestroy(B);CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757