xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fb10cecf1927e102c85c17a39b04f58da85ca3ce)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 /*
8   Local utility routine that creates a mapping from the global column
9 number to the local number in the off-diagonal part of the local
10 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
11 a slightly higher hash table cost; without it it is not scalable (each processor
12 has an order N integer array but is fast to acess.
13 */
14 #undef __FUNCT__
15 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
16 int CreateColmap_MPIAIJ_Private(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19   int        n = aij->B->n,i,ierr;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
29   PetscLogObjectMemory(mat,mat->N*sizeof(int));
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39  \
40     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
41     rmax = aimax[row]; nrow = ailen[row];  \
42     col1 = col - shift; \
43      \
44     low = 0; high = nrow; \
45     while (high-low > 5) { \
46       t = (low+high)/2; \
47       if (rp[t] > col) high = t; \
48       else             low  = t; \
49     } \
50       for (_i=low; _i<high; _i++) { \
51         if (rp[_i] > col1) break; \
52         if (rp[_i] == col1) { \
53           if (addv == ADD_VALUES) ap[_i] += value;   \
54           else                  ap[_i] = value; \
55           goto a_noinsert; \
56         } \
57       }  \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (int*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(int));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                  ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (nonew == 1) goto b_noinsert; \
133       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
134       if (nrow >= rmax) { \
135         /* there is no extra room in row, therefore enlarge */ \
136         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
137         PetscScalar *new_a; \
138  \
139         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
140  \
141         /* malloc new storage space */ \
142         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
143         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
144         new_j   = (int*)(new_a + new_nz); \
145         new_i   = new_j + new_nz; \
146  \
147         /* copy over old data into new slots */ \
148         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
149         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
150         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
151         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
152         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
153                                                            len*sizeof(int));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
156                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
157         /* free up old matrix storage */ \
158  \
159         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
160         if (!b->singlemalloc) { \
161           ierr = PetscFree(b->i);CHKERRQ(ierr); \
162           ierr = PetscFree(b->j);CHKERRQ(ierr); \
163         } \
164         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
165         b->singlemalloc = PETSC_TRUE; \
166  \
167         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
168         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
169         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
170         b->maxnz += CHUNKSIZE; \
171         b->reallocs++; \
172       } \
173       N = nrow++ - 1; b->nz++; \
174       /* shift up all the later entries in this row */ \
175       for (ii=N; ii>=_i; ii--) { \
176         rp[ii+1] = rp[ii]; \
177         ap[ii+1] = ap[ii]; \
178       } \
179       rp[_i] = col1;  \
180       ap[_i] = value;  \
181       b_noinsert: ; \
182       bilen[row] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIAIJ"
187 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
190   PetscScalar  value;
191   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
277   int        cstart = aij->cstart,cend = aij->cend,row,col;
278 
279   PetscFunctionBegin;
280   for (i=0; i<m; i++) {
281     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
282     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
283     if (idxm[i] >= rstart && idxm[i] < rend) {
284       row = idxm[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
287         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
288         if (idxn[j] >= cstart && idxn[j] < cend){
289           col = idxn[j] - cstart;
290           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
291         } else {
292           if (!aij->colmap) {
293             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
294           }
295 #if defined (PETSC_USE_CTABLE)
296           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
297           col --;
298 #else
299           col = aij->colmap[idxn[j]] - 1;
300 #endif
301           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
302           else {
303             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
304           }
305         }
306       }
307     } else {
308       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
309     }
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
316 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
317 {
318   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
319   int         ierr,nstash,reallocs;
320   InsertMode  addv;
321 
322   PetscFunctionBegin;
323   if (aij->donotstash) {
324     PetscFunctionReturn(0);
325   }
326 
327   /* make sure all processors are either in INSERTMODE or ADDMODE */
328   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
329   if (addv == (ADD_VALUES|INSERT_VALUES)) {
330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
331   }
332   mat->insertmode = addv; /* in case this processor had no cache */
333 
334   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
335   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
336   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
337   PetscFunctionReturn(0);
338 }
339 
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
343 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
344 {
345   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
346   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
347   int         i,j,rstart,ncols,n,ierr,flg;
348   int         *row,*col,other_disassembled;
349   PetscScalar *val;
350   InsertMode  addv = mat->insertmode;
351 
352   PetscFunctionBegin;
353   if (!aij->donotstash) {
354     while (1) {
355       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
356       if (!flg) break;
357 
358       for (i=0; i<n;) {
359         /* Now identify the consecutive vals belonging to the same row */
360         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
361         if (j < n) ncols = j-i;
362         else       ncols = n-i;
363         /* Now assemble all these values with a single function call */
364         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
365         i = j;
366       }
367     }
368     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
369   }
370 
371   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
372   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
373 
374   /* determine if any processor has disassembled, if so we must
375      also disassemble ourselfs, in order that we may reassemble. */
376   /*
377      if nonzero structure of submatrix B cannot change then we know that
378      no processor disassembled thus we can skip this stuff
379   */
380   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
381     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
382     if (mat->was_assembled && !other_disassembled) {
383       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
384     }
385   }
386 
387   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
388     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
389   }
390   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
391   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
392 
393   if (aij->rowvalues) {
394     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
395     aij->rowvalues = 0;
396   }
397 
398   /* used by MatAXPY() */
399   a->xtoy = 0; b->xtoy = 0;
400   a->XtoY = 0; b->XtoY = 0;
401 
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
407 int MatZeroEntries_MPIAIJ(Mat A)
408 {
409   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
410   int        ierr;
411 
412   PetscFunctionBegin;
413   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
414   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroRows_MPIAIJ"
420 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
421 {
422   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
423   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
424   int            *nprocs,j,idx,nsends,row;
425   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
426   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
427   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
428   MPI_Comm       comm = A->comm;
429   MPI_Request    *send_waits,*recv_waits;
430   MPI_Status     recv_status,*send_status;
431   IS             istmp;
432   PetscTruth     found;
433 
434   PetscFunctionBegin;
435   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
436   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
437 
438   /*  first count number of contributors to each processor */
439   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
440   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
441   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
442   for (i=0; i<N; i++) {
443     idx = rows[i];
444     found = PETSC_FALSE;
445     for (j=0; j<size; j++) {
446       if (idx >= owners[j] && idx < owners[j+1]) {
447         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
448       }
449     }
450     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
451   }
452   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
453 
454   /* inform other processors of number of messages and max length*/
455   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
456 
457   /* post receives:   */
458   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
459   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
460   for (i=0; i<nrecvs; i++) {
461     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
462   }
463 
464   /* do sends:
465       1) starts[i] gives the starting index in svalues for stuff going to
466          the ith processor
467   */
468   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
469   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
470   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
471   starts[0] = 0;
472   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
473   for (i=0; i<N; i++) {
474     svalues[starts[owner[i]]++] = rows[i];
475   }
476   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
477 
478   starts[0] = 0;
479   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   count = 0;
481   for (i=0; i<size; i++) {
482     if (nprocs[2*i+1]) {
483       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
484     }
485   }
486   ierr = PetscFree(starts);CHKERRQ(ierr);
487 
488   base = owners[rank];
489 
490   /*  wait on receives */
491   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
492   source = lens + nrecvs;
493   count  = nrecvs; slen = 0;
494   while (count) {
495     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
496     /* unpack receives into our local space */
497     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
498     source[imdex]  = recv_status.MPI_SOURCE;
499     lens[imdex]    = n;
500     slen          += n;
501     count--;
502   }
503   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
504 
505   /* move the data into the send scatter */
506   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<nrecvs; i++) {
509     values = rvalues + i*nmax;
510     for (j=0; j<lens[i]; j++) {
511       lrows[count++] = values[j] - base;
512     }
513   }
514   ierr = PetscFree(rvalues);CHKERRQ(ierr);
515   ierr = PetscFree(lens);CHKERRQ(ierr);
516   ierr = PetscFree(owner);CHKERRQ(ierr);
517   ierr = PetscFree(nprocs);CHKERRQ(ierr);
518 
519   /* actually zap the local rows */
520   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
521   PetscLogObjectParent(A,istmp);
522 
523   /*
524         Zero the required rows. If the "diagonal block" of the matrix
525      is square and the user wishes to set the diagonal we use seperate
526      code so that MatSetValues() is not called for each diagonal allocating
527      new memory, thus calling lots of mallocs and slowing things down.
528 
529        Contributed by: Mathew Knepley
530   */
531   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
532   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
533   if (diag && (l->A->M == l->A->N)) {
534     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
535   } else if (diag) {
536     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
537     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
538       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
539 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
540     }
541     for (i = 0; i < slen; i++) {
542       row  = lrows[i] + rstart;
543       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
544     }
545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
547   } else {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549   }
550   ierr = ISDestroy(istmp);CHKERRQ(ierr);
551   ierr = PetscFree(lrows);CHKERRQ(ierr);
552 
553   /* wait on sends */
554   if (nsends) {
555     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
556     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
557     ierr = PetscFree(send_status);CHKERRQ(ierr);
558   }
559   ierr = PetscFree(send_waits);CHKERRQ(ierr);
560   ierr = PetscFree(svalues);CHKERRQ(ierr);
561 
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMult_MPIAIJ"
567 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
568 {
569   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
570   int        ierr,nt;
571 
572   PetscFunctionBegin;
573   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
574   if (nt != A->n) {
575     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
576   }
577   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
578   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
579   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
580   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMultAdd_MPIAIJ"
586 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr;
590 
591   PetscFunctionBegin;
592   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
594   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
595   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
601 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
602 {
603   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
604   int        ierr;
605 
606   PetscFunctionBegin;
607   /* do nondiagonal part */
608   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
609   /* send it on its way */
610   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
611   /* do local part */
612   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
613   /* receive remote parts: note this assumes the values are not actually */
614   /* inserted in yy until the next line, which is true for my implementation*/
615   /* but is not perhaps always true. */
616   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 EXTERN_C_BEGIN
621 #undef __FUNCT__
622 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
623 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
624 {
625   MPI_Comm comm;
626   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
627   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
628   IS         Me,Notme;
629   int        M,N,first,last,*notme,ntids,i, ierr;
630 
631   PetscFunctionBegin;
632 
633   /* Easy test: symmetric diagonal block */
634   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
635   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
636   if (!*f) PetscFunctionReturn(0);
637   ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr);
638   ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr);
639   if (ntids==1) PetscFunctionReturn(0);
640 
641   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
642   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
643   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
644   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
645   for (i=0; i<first; i++) notme[i] = i;
646   for (i=last; i<M; i++) notme[i-last+first] = i;
647   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
648   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
649   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
650   Aoff = Aoffs[0];
651   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
652   Boff = Boffs[0];
653   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
654   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
655   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
656   ierr = ISDestroy(Me); CHKERRQ(ierr);
657   ierr = ISDestroy(Notme); CHKERRQ(ierr);
658 
659   PetscFunctionReturn(0);
660 }
661 EXTERN_C_END
662 
663 #undef __FUNCT__
664 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
665 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
666 {
667   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
668   int        ierr;
669 
670   PetscFunctionBegin;
671   /* do nondiagonal part */
672   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
673   /* send it on its way */
674   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
675   /* do local part */
676   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
677   /* receive remote parts: note this assumes the values are not actually */
678   /* inserted in yy until the next line, which is true for my implementation*/
679   /* but is not perhaps always true. */
680   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 /*
685   This only works correctly for square matrices where the subblock A->A is the
686    diagonal block
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
690 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
691 {
692   int        ierr;
693   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
694 
695   PetscFunctionBegin;
696   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
697   if (a->rstart != a->cstart || a->rend != a->cend) {
698     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
699   }
700   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatScale_MPIAIJ"
706 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
707 {
708   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
709   int        ierr;
710 
711   PetscFunctionBegin;
712   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
713   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "MatDestroy_MPIAIJ"
719 int MatDestroy_MPIAIJ(Mat mat)
720 {
721   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
722   int        ierr;
723 
724   PetscFunctionBegin;
725 #if defined(PETSC_USE_LOG)
726   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
727 #endif
728   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
729   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
730   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
731   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
732 #if defined (PETSC_USE_CTABLE)
733   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
734 #else
735   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
736 #endif
737   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
738   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
739   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
740   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
741   ierr = PetscFree(aij);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatView_MPIAIJ_Binary"
747 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
748 {
749   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
750   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
751   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
752   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
753   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
754   PetscScalar       *column_values;
755 
756   PetscFunctionBegin;
757   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
758   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
759   nz   = A->nz + B->nz;
760   if (rank == 0) {
761     header[0] = MAT_FILE_COOKIE;
762     header[1] = mat->M;
763     header[2] = mat->N;
764     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
765     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
766     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
767     /* get largest number of rows any processor has */
768     rlen = mat->m;
769     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
770     for (i=1; i<size; i++) {
771       rlen = PetscMax(rlen,range[i+1] - range[i]);
772     }
773   } else {
774     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
775     rlen = mat->m;
776   }
777 
778   /* load up the local row counts */
779   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
780   for (i=0; i<mat->m; i++) {
781     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
782   }
783 
784   /* store the row lengths to the file */
785   if (rank == 0) {
786     MPI_Status status;
787     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
788     for (i=1; i<size; i++) {
789       rlen = range[i+1] - range[i];
790       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
791       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
792     }
793   } else {
794     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
795   }
796   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
797 
798   /* load up the local column indices */
799   nzmax = nz; /* )th processor needs space a largest processor needs */
800   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
801   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
802   cnt  = 0;
803   for (i=0; i<mat->m; i++) {
804     for (j=B->i[i]; j<B->i[i+1]; j++) {
805       if ( (col = garray[B->j[j]]) > cstart) break;
806       column_indices[cnt++] = col;
807     }
808     for (k=A->i[i]; k<A->i[i+1]; k++) {
809       column_indices[cnt++] = A->j[k] + cstart;
810     }
811     for (; j<B->i[i+1]; j++) {
812       column_indices[cnt++] = garray[B->j[j]];
813     }
814   }
815   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
816 
817   /* store the column indices to the file */
818   if (rank == 0) {
819     MPI_Status status;
820     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
821     for (i=1; i<size; i++) {
822       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
824       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
826     }
827   } else {
828     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
829     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
830   }
831   ierr = PetscFree(column_indices);CHKERRQ(ierr);
832 
833   /* load up the local column values */
834   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
835   cnt  = 0;
836   for (i=0; i<mat->m; i++) {
837     for (j=B->i[i]; j<B->i[i+1]; j++) {
838       if ( garray[B->j[j]] > cstart) break;
839       column_values[cnt++] = B->a[j];
840     }
841     for (k=A->i[i]; k<A->i[i+1]; k++) {
842       column_values[cnt++] = A->a[k];
843     }
844     for (; j<B->i[i+1]; j++) {
845       column_values[cnt++] = B->a[j];
846     }
847   }
848   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
849 
850   /* store the column values to the file */
851   if (rank == 0) {
852     MPI_Status status;
853     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
854     for (i=1; i<size; i++) {
855       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
857       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
858       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
859     }
860   } else {
861     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
862     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(column_values);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
870 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
871 {
872   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
873   int               ierr,rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,isascii,flg,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (isascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo info;
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
889       if (flg) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
891 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
894 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
906       PetscFunctionReturn(0);
907     }
908   } else if (isbinary) {
909     if (size == 1) {
910       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
911       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
912     } else {
913       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
914     }
915     PetscFunctionReturn(0);
916   } else if (isdraw) {
917     PetscDraw  draw;
918     PetscTruth isnull;
919     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
920     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
921   }
922 
923   if (size == 1) {
924     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
925     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
926   } else {
927     /* assemble the entire matrix onto first processor. */
928     Mat         A;
929     Mat_SeqAIJ *Aloc;
930     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
931     PetscScalar *a;
932 
933     if (!rank) {
934       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
935     } else {
936       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
937     }
938     PetscLogObjectParent(mat,A);
939 
940     /* copy over the A part */
941     Aloc = (Mat_SeqAIJ*)aij->A->data;
942     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row = aij->rstart;
944     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
945     for (i=0; i<m; i++) {
946       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
947       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
948     }
949     aj = Aloc->j;
950     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
951 
952     /* copy over the B part */
953     Aloc = (Mat_SeqAIJ*)aij->B->data;
954     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row  = aij->rstart;
956     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
957     ct   = cols;
958     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
959     for (i=0; i<m; i++) {
960       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
961       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
962     }
963     ierr = PetscFree(ct);CHKERRQ(ierr);
964     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     /*
967        Everyone has to call to draw the matrix since the graphics waits are
968        synchronized across all processors that share the PetscDraw object
969     */
970     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
971     if (!rank) {
972       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
973       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
974     }
975     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
976     ierr = MatDestroy(A);CHKERRQ(ierr);
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatView_MPIAIJ"
983 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
984 {
985   int        ierr;
986   PetscTruth isascii,isdraw,issocket,isbinary;
987 
988   PetscFunctionBegin;
989   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
990   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
991   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
992   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
993   if (isascii || isdraw || isbinary || issocket) {
994     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
995   } else {
996     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatRelax_MPIAIJ"
1005 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1006 {
1007   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1008   int          ierr;
1009   Vec          bb1;
1010   PetscScalar  mone=-1.0;
1011 
1012   PetscFunctionBegin;
1013   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1014 
1015   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1016 
1017   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1018     if (flag & SOR_ZERO_INITIAL_GUESS) {
1019       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1020       its--;
1021     }
1022 
1023     while (its--) {
1024       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1025       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1026 
1027       /* update rhs: bb1 = bb - B*x */
1028       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1029       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1030 
1031       /* local sweep */
1032       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1033       CHKERRQ(ierr);
1034     }
1035   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1036     if (flag & SOR_ZERO_INITIAL_GUESS) {
1037       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1038       its--;
1039     }
1040     while (its--) {
1041       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1042       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1043 
1044       /* update rhs: bb1 = bb - B*x */
1045       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1046       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1047 
1048       /* local sweep */
1049       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1050       CHKERRQ(ierr);
1051     }
1052   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1053     if (flag & SOR_ZERO_INITIAL_GUESS) {
1054       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1055       its--;
1056     }
1057     while (its--) {
1058       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1060 
1061       /* update rhs: bb1 = bb - B*x */
1062       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1063       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1064 
1065       /* local sweep */
1066       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1067       CHKERRQ(ierr);
1068     }
1069   } else {
1070     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1071   }
1072 
1073   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1079 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080 {
1081   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1082   Mat        A = mat->A,B = mat->B;
1083   int        ierr;
1084   PetscReal  isend[5],irecv[5];
1085 
1086   PetscFunctionBegin;
1087   info->block_size     = 1.0;
1088   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090   isend[3] = info->memory;  isend[4] = info->mallocs;
1091   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093   isend[3] += info->memory;  isend[4] += info->mallocs;
1094   if (flag == MAT_LOCAL) {
1095     info->nz_used      = isend[0];
1096     info->nz_allocated = isend[1];
1097     info->nz_unneeded  = isend[2];
1098     info->memory       = isend[3];
1099     info->mallocs      = isend[4];
1100   } else if (flag == MAT_GLOBAL_MAX) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else if (flag == MAT_GLOBAL_SUM) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   }
1115   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1116   info->fill_ratio_needed = 0;
1117   info->factor_mallocs    = 0;
1118   info->rows_global       = (double)matin->M;
1119   info->columns_global    = (double)matin->N;
1120   info->rows_local        = (double)matin->m;
1121   info->columns_local     = (double)matin->N;
1122 
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatSetOption_MPIAIJ"
1128 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1129 {
1130   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1131   int        ierr;
1132 
1133   PetscFunctionBegin;
1134   switch (op) {
1135   case MAT_NO_NEW_NONZERO_LOCATIONS:
1136   case MAT_YES_NEW_NONZERO_LOCATIONS:
1137   case MAT_COLUMNS_UNSORTED:
1138   case MAT_COLUMNS_SORTED:
1139   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1140   case MAT_KEEP_ZEROED_ROWS:
1141   case MAT_NEW_NONZERO_LOCATION_ERR:
1142   case MAT_USE_INODES:
1143   case MAT_DO_NOT_USE_INODES:
1144   case MAT_IGNORE_ZERO_ENTRIES:
1145     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1146     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1147     break;
1148   case MAT_ROW_ORIENTED:
1149     a->roworiented = PETSC_TRUE;
1150     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1151     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1152     break;
1153   case MAT_ROWS_SORTED:
1154   case MAT_ROWS_UNSORTED:
1155   case MAT_YES_NEW_DIAGONALS:
1156     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1157     break;
1158   case MAT_COLUMN_ORIENTED:
1159     a->roworiented = PETSC_FALSE;
1160     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1161     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1162     break;
1163   case MAT_IGNORE_OFF_PROC_ENTRIES:
1164     a->donotstash = PETSC_TRUE;
1165     break;
1166   case MAT_NO_NEW_DIAGONALS:
1167     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1168   case MAT_SYMMETRIC:
1169   case MAT_STRUCTURALLY_SYMMETRIC:
1170     break;
1171   default:
1172     SETERRQ(PETSC_ERR_SUP,"unknown option");
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatGetRow_MPIAIJ"
1179 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1180 {
1181   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1182   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1183   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1184   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1185   int          *cmap,*idx_p;
1186 
1187   PetscFunctionBegin;
1188   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1189   mat->getrowactive = PETSC_TRUE;
1190 
1191   if (!mat->rowvalues && (idx || v)) {
1192     /*
1193         allocate enough space to hold information from the longest row.
1194     */
1195     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1196     int     max = 1,tmp;
1197     for (i=0; i<matin->m; i++) {
1198       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1199       if (max < tmp) { max = tmp; }
1200     }
1201     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1202     mat->rowindices = (int*)(mat->rowvalues + max);
1203   }
1204 
1205   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1206   lrow = row - rstart;
1207 
1208   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1209   if (!v)   {pvA = 0; pvB = 0;}
1210   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1211   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1212   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1213   nztot = nzA + nzB;
1214 
1215   cmap  = mat->garray;
1216   if (v  || idx) {
1217     if (nztot) {
1218       /* Sort by increasing column numbers, assuming A and B already sorted */
1219       int imark = -1;
1220       if (v) {
1221         *v = v_p = mat->rowvalues;
1222         for (i=0; i<nzB; i++) {
1223           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1224           else break;
1225         }
1226         imark = i;
1227         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1228         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1229       }
1230       if (idx) {
1231         *idx = idx_p = mat->rowindices;
1232         if (imark > -1) {
1233           for (i=0; i<imark; i++) {
1234             idx_p[i] = cmap[cworkB[i]];
1235           }
1236         } else {
1237           for (i=0; i<nzB; i++) {
1238             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1239             else break;
1240           }
1241           imark = i;
1242         }
1243         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1244         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1245       }
1246     } else {
1247       if (idx) *idx = 0;
1248       if (v)   *v   = 0;
1249     }
1250   }
1251   *nz = nztot;
1252   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1253   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1259 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1260 {
1261   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1262 
1263   PetscFunctionBegin;
1264   if (aij->getrowactive == PETSC_FALSE) {
1265     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1266   }
1267   aij->getrowactive = PETSC_FALSE;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatNorm_MPIAIJ"
1273 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1274 {
1275   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1276   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1277   int          ierr,i,j,cstart = aij->cstart;
1278   PetscReal    sum = 0.0;
1279   PetscScalar  *v;
1280 
1281   PetscFunctionBegin;
1282   if (aij->size == 1) {
1283     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1284   } else {
1285     if (type == NORM_FROBENIUS) {
1286       v = amat->a;
1287       for (i=0; i<amat->nz; i++) {
1288 #if defined(PETSC_USE_COMPLEX)
1289         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1290 #else
1291         sum += (*v)*(*v); v++;
1292 #endif
1293       }
1294       v = bmat->a;
1295       for (i=0; i<bmat->nz; i++) {
1296 #if defined(PETSC_USE_COMPLEX)
1297         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1298 #else
1299         sum += (*v)*(*v); v++;
1300 #endif
1301       }
1302       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1303       *norm = sqrt(*norm);
1304     } else if (type == NORM_1) { /* max column norm */
1305       PetscReal *tmp,*tmp2;
1306       int    *jj,*garray = aij->garray;
1307       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1308       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1309       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1310       *norm = 0.0;
1311       v = amat->a; jj = amat->j;
1312       for (j=0; j<amat->nz; j++) {
1313         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1314       }
1315       v = bmat->a; jj = bmat->j;
1316       for (j=0; j<bmat->nz; j++) {
1317         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1318       }
1319       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1320       for (j=0; j<mat->N; j++) {
1321         if (tmp2[j] > *norm) *norm = tmp2[j];
1322       }
1323       ierr = PetscFree(tmp);CHKERRQ(ierr);
1324       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1325     } else if (type == NORM_INFINITY) { /* max row norm */
1326       PetscReal ntemp = 0.0;
1327       for (j=0; j<aij->A->m; j++) {
1328         v = amat->a + amat->i[j];
1329         sum = 0.0;
1330         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1331           sum += PetscAbsScalar(*v); v++;
1332         }
1333         v = bmat->a + bmat->i[j];
1334         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1335           sum += PetscAbsScalar(*v); v++;
1336         }
1337         if (sum > ntemp) ntemp = sum;
1338       }
1339       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1340     } else {
1341       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1342     }
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatTranspose_MPIAIJ"
1349 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1350 {
1351   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1352   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1353   int          ierr;
1354   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1355   Mat          B;
1356   PetscScalar  *array;
1357 
1358   PetscFunctionBegin;
1359   if (!matout && M != N) {
1360     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1361   }
1362 
1363   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1364 
1365   /* copy over the A part */
1366   Aloc = (Mat_SeqAIJ*)a->A->data;
1367   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1368   row = a->rstart;
1369   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1370   for (i=0; i<m; i++) {
1371     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1372     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1373   }
1374   aj = Aloc->j;
1375   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1376 
1377   /* copy over the B part */
1378   Aloc = (Mat_SeqAIJ*)a->B->data;
1379   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1380   row  = a->rstart;
1381   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1382   ct   = cols;
1383   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1384   for (i=0; i<m; i++) {
1385     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1386     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1387   }
1388   ierr = PetscFree(ct);CHKERRQ(ierr);
1389   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   if (matout) {
1392     *matout = B;
1393   } else {
1394     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1401 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1402 {
1403   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1404   Mat        a = aij->A,b = aij->B;
1405   int        ierr,s1,s2,s3;
1406 
1407   PetscFunctionBegin;
1408   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1409   if (rr) {
1410     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1411     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1412     /* Overlap communication with computation. */
1413     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1414   }
1415   if (ll) {
1416     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1417     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1418     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1419   }
1420   /* scale  the diagonal block */
1421   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1422 
1423   if (rr) {
1424     /* Do a scatter end and then right scale the off-diagonal block */
1425     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1426     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1427   }
1428 
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1435 int MatPrintHelp_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   if (!a->rank) {
1442     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1443   }
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1449 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1450 {
1451   PetscFunctionBegin;
1452   *bs = 1;
1453   PetscFunctionReturn(0);
1454 }
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1457 int MatSetUnfactored_MPIAIJ(Mat A)
1458 {
1459   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1460   int        ierr;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatEqual_MPIAIJ"
1469 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1470 {
1471   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1472   Mat        a,b,c,d;
1473   PetscTruth flg;
1474   int        ierr;
1475 
1476   PetscFunctionBegin;
1477   a = matA->A; b = matA->B;
1478   c = matB->A; d = matB->B;
1479 
1480   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1481   if (flg == PETSC_TRUE) {
1482     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1483   }
1484   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatCopy_MPIAIJ"
1490 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1491 {
1492   int        ierr;
1493   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1494   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1495 
1496   PetscFunctionBegin;
1497   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1498   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1499     /* because of the column compression in the off-processor part of the matrix a->B,
1500        the number of columns in a->B and b->B may be different, hence we cannot call
1501        the MatCopy() directly on the two parts. If need be, we can provide a more
1502        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1503        then copying the submatrices */
1504     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1505   } else {
1506     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1507     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1508   }
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1514 int MatSetUpPreallocation_MPIAIJ(Mat A)
1515 {
1516   int        ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 #include "petscblaslapack.h"
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatAXPY_MPIAIJ"
1526 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1527 {
1528   int        ierr,one=1,i;
1529   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1530   Mat_SeqAIJ *x,*y;
1531 
1532   PetscFunctionBegin;
1533   if (str == SAME_NONZERO_PATTERN) {
1534     x = (Mat_SeqAIJ *)xx->A->data;
1535     y = (Mat_SeqAIJ *)yy->A->data;
1536     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1537     x = (Mat_SeqAIJ *)xx->B->data;
1538     y = (Mat_SeqAIJ *)yy->B->data;
1539     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1540   } else if (str == SUBSET_NONZERO_PATTERN) {
1541     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1542 
1543     x = (Mat_SeqAIJ *)xx->B->data;
1544     y = (Mat_SeqAIJ *)yy->B->data;
1545     if (y->xtoy && y->XtoY != xx->B) {
1546       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1547       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1548     }
1549     if (!y->xtoy) { /* get xtoy */
1550       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1551       y->XtoY = xx->B;
1552     }
1553     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1554   } else {
1555     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 /* -------------------------------------------------------------------*/
1561 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1562        MatGetRow_MPIAIJ,
1563        MatRestoreRow_MPIAIJ,
1564        MatMult_MPIAIJ,
1565 /* 4*/ MatMultAdd_MPIAIJ,
1566        MatMultTranspose_MPIAIJ,
1567        MatMultTransposeAdd_MPIAIJ,
1568        0,
1569        0,
1570        0,
1571 /*10*/ 0,
1572        0,
1573        0,
1574        MatRelax_MPIAIJ,
1575        MatTranspose_MPIAIJ,
1576 /*15*/ MatGetInfo_MPIAIJ,
1577        MatEqual_MPIAIJ,
1578        MatGetDiagonal_MPIAIJ,
1579        MatDiagonalScale_MPIAIJ,
1580        MatNorm_MPIAIJ,
1581 /*20*/ MatAssemblyBegin_MPIAIJ,
1582        MatAssemblyEnd_MPIAIJ,
1583        0,
1584        MatSetOption_MPIAIJ,
1585        MatZeroEntries_MPIAIJ,
1586 /*25*/ MatZeroRows_MPIAIJ,
1587 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1588        MatLUFactorSymbolic_MPIAIJ_TFS,
1589 #else
1590        0,
1591 #endif
1592        0,
1593        0,
1594        0,
1595 /*30*/ MatSetUpPreallocation_MPIAIJ,
1596        0,
1597        0,
1598        0,
1599        0,
1600 /*35*/ MatDuplicate_MPIAIJ,
1601        0,
1602        0,
1603        0,
1604        0,
1605 /*40*/ MatAXPY_MPIAIJ,
1606        MatGetSubMatrices_MPIAIJ,
1607        MatIncreaseOverlap_MPIAIJ,
1608        MatGetValues_MPIAIJ,
1609        MatCopy_MPIAIJ,
1610 /*45*/ MatPrintHelp_MPIAIJ,
1611        MatScale_MPIAIJ,
1612        0,
1613        0,
1614        0,
1615 /*50*/ MatGetBlockSize_MPIAIJ,
1616        0,
1617        0,
1618        0,
1619        0,
1620 /*55*/ MatFDColoringCreate_MPIAIJ,
1621        0,
1622        MatSetUnfactored_MPIAIJ,
1623        0,
1624        0,
1625 /*60*/ MatGetSubMatrix_MPIAIJ,
1626        MatDestroy_MPIAIJ,
1627        MatView_MPIAIJ,
1628        MatGetPetscMaps_Petsc,
1629        0,
1630 /*65*/ 0,
1631        0,
1632        0,
1633        0,
1634        0,
1635 /*70*/ 0,
1636        0,
1637        MatSetColoring_MPIAIJ,
1638        MatSetValuesAdic_MPIAIJ,
1639        MatSetValuesAdifor_MPIAIJ,
1640 /*75*/ 0,
1641        0,
1642        0,
1643        0,
1644        0,
1645 /*80*/ 0,
1646        0,
1647        0,
1648        0,
1649        0,
1650 /*85*/ MatLoad_MPIAIJ};
1651 
1652 /* ----------------------------------------------------------------------------------------*/
1653 
1654 EXTERN_C_BEGIN
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1657 int MatStoreValues_MPIAIJ(Mat mat)
1658 {
1659   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1660   int        ierr;
1661 
1662   PetscFunctionBegin;
1663   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1664   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 EXTERN_C_END
1668 
1669 EXTERN_C_BEGIN
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1672 int MatRetrieveValues_MPIAIJ(Mat mat)
1673 {
1674   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1675   int        ierr;
1676 
1677   PetscFunctionBegin;
1678   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1679   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 EXTERN_C_END
1683 
1684 #include "petscpc.h"
1685 EXTERN_C_BEGIN
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1688 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1689 {
1690   Mat_MPIAIJ   *b;
1691   int          ierr,i;
1692 
1693   PetscFunctionBegin;
1694   B->preallocated = PETSC_TRUE;
1695   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1696   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1697   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1698   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1699   if (d_nnz) {
1700     for (i=0; i<B->m; i++) {
1701       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1702     }
1703   }
1704   if (o_nnz) {
1705     for (i=0; i<B->m; i++) {
1706       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1707     }
1708   }
1709   b = (Mat_MPIAIJ*)B->data;
1710 
1711   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1712   PetscLogObjectParent(B,b->A);
1713   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1714   PetscLogObjectParent(B,b->B);
1715 
1716   PetscFunctionReturn(0);
1717 }
1718 EXTERN_C_END
1719 
1720 /*MC
1721    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1722 
1723    Options Database Keys:
1724 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1725 
1726   Level: beginner
1727 
1728 .seealso: MatCreateMPIAIJ
1729 M*/
1730 
1731 EXTERN_C_BEGIN
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatCreate_MPIAIJ"
1734 int MatCreate_MPIAIJ(Mat B)
1735 {
1736   Mat_MPIAIJ *b;
1737   int        ierr,i,size;
1738 
1739   PetscFunctionBegin;
1740   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1741 
1742   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1743   B->data         = (void*)b;
1744   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1745   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1746   B->factor       = 0;
1747   B->assembled    = PETSC_FALSE;
1748   B->mapping      = 0;
1749 
1750   B->insertmode      = NOT_SET_VALUES;
1751   b->size            = size;
1752   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1753 
1754   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1755   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1756 
1757   /* the information in the maps duplicates the information computed below, eventually
1758      we should remove the duplicate information that is not contained in the maps */
1759   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1760   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1761 
1762   /* build local table of row and column ownerships */
1763   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1764   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1765   b->cowners = b->rowners + b->size + 2;
1766   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1767   b->rowners[0] = 0;
1768   for (i=2; i<=b->size; i++) {
1769     b->rowners[i] += b->rowners[i-1];
1770   }
1771   b->rstart = b->rowners[b->rank];
1772   b->rend   = b->rowners[b->rank+1];
1773   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1774   b->cowners[0] = 0;
1775   for (i=2; i<=b->size; i++) {
1776     b->cowners[i] += b->cowners[i-1];
1777   }
1778   b->cstart = b->cowners[b->rank];
1779   b->cend   = b->cowners[b->rank+1];
1780 
1781   /* build cache for off array entries formed */
1782   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1783   b->donotstash  = PETSC_FALSE;
1784   b->colmap      = 0;
1785   b->garray      = 0;
1786   b->roworiented = PETSC_TRUE;
1787 
1788   /* stuff used for matrix vector multiply */
1789   b->lvec      = PETSC_NULL;
1790   b->Mvctx     = PETSC_NULL;
1791 
1792   /* stuff for MatGetRow() */
1793   b->rowindices   = 0;
1794   b->rowvalues    = 0;
1795   b->getrowactive = PETSC_FALSE;
1796 
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1798                                      "MatStoreValues_MPIAIJ",
1799                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1801                                      "MatRetrieveValues_MPIAIJ",
1802                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1804 				     "MatGetDiagonalBlock_MPIAIJ",
1805                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1807 				     "MatIsSymmetric_MPIAIJ",
1808 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1810 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1811 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1813 				     "MatDiagonalScaleLocal_MPIAIJ",
1814 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1815   PetscFunctionReturn(0);
1816 }
1817 EXTERN_C_END
1818 
1819 /*MC
1820    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1821 
1822    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1823    and MATMPIAIJ otherwise.
1824 
1825    Options Database Keys:
1826 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1827 
1828   Level: beginner
1829 
1830 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1831 M*/
1832 
1833 EXTERN_C_BEGIN
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatCreate_AIJ"
1836 int MatCreate_AIJ(Mat A) {
1837   int ierr,size;
1838 
1839   PetscFunctionBegin;
1840   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1841   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1842   if (size == 1) {
1843     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1844   } else {
1845     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 EXTERN_C_END
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1853 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1854 {
1855   Mat        mat;
1856   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1857   int        ierr;
1858 
1859   PetscFunctionBegin;
1860   *newmat       = 0;
1861   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1862   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1863   a    = (Mat_MPIAIJ*)mat->data;
1864   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1865   mat->factor       = matin->factor;
1866   mat->assembled    = PETSC_TRUE;
1867   mat->insertmode   = NOT_SET_VALUES;
1868   mat->preallocated = PETSC_TRUE;
1869 
1870   a->rstart       = oldmat->rstart;
1871   a->rend         = oldmat->rend;
1872   a->cstart       = oldmat->cstart;
1873   a->cend         = oldmat->cend;
1874   a->size         = oldmat->size;
1875   a->rank         = oldmat->rank;
1876   a->donotstash   = oldmat->donotstash;
1877   a->roworiented  = oldmat->roworiented;
1878   a->rowindices   = 0;
1879   a->rowvalues    = 0;
1880   a->getrowactive = PETSC_FALSE;
1881 
1882   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1883   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1884   if (oldmat->colmap) {
1885 #if defined (PETSC_USE_CTABLE)
1886     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1887 #else
1888     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1889     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1890     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1891 #endif
1892   } else a->colmap = 0;
1893   if (oldmat->garray) {
1894     int len;
1895     len  = oldmat->B->n;
1896     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1897     PetscLogObjectMemory(mat,len*sizeof(int));
1898     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1899   } else a->garray = 0;
1900 
1901   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1902   PetscLogObjectParent(mat,a->lvec);
1903   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1904   PetscLogObjectParent(mat,a->Mvctx);
1905   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1906   PetscLogObjectParent(mat,a->A);
1907   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1908   PetscLogObjectParent(mat,a->B);
1909   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1910   *newmat = mat;
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #include "petscsys.h"
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatLoad_MPIAIJ"
1918 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1919 {
1920   Mat          A;
1921   PetscScalar  *vals,*svals;
1922   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1923   MPI_Status   status;
1924   int          i,nz,ierr,j,rstart,rend,fd;
1925   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1926   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1927   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1928 
1929   PetscFunctionBegin;
1930   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1931   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1932   if (!rank) {
1933     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1934     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1935     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1936     if (header[3] < 0) {
1937       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1938     }
1939   }
1940 
1941   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1942   M = header[1]; N = header[2];
1943   /* determine ownership of all rows */
1944   m = M/size + ((M % size) > rank);
1945   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1946   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1947   rowners[0] = 0;
1948   for (i=2; i<=size; i++) {
1949     rowners[i] += rowners[i-1];
1950   }
1951   rstart = rowners[rank];
1952   rend   = rowners[rank+1];
1953 
1954   /* distribute row lengths to all processors */
1955   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1956   offlens = ourlens + (rend-rstart);
1957   if (!rank) {
1958     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1959     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1960     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1961     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1962     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1963     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1964   } else {
1965     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1966   }
1967 
1968   if (!rank) {
1969     /* calculate the number of nonzeros on each processor */
1970     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1971     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1972     for (i=0; i<size; i++) {
1973       for (j=rowners[i]; j< rowners[i+1]; j++) {
1974         procsnz[i] += rowlengths[j];
1975       }
1976     }
1977     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1978 
1979     /* determine max buffer needed and allocate it */
1980     maxnz = 0;
1981     for (i=0; i<size; i++) {
1982       maxnz = PetscMax(maxnz,procsnz[i]);
1983     }
1984     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1985 
1986     /* read in my part of the matrix column indices  */
1987     nz   = procsnz[0];
1988     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1989     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1990 
1991     /* read in every one elses and ship off */
1992     for (i=1; i<size; i++) {
1993       nz   = procsnz[i];
1994       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1995       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1996     }
1997     ierr = PetscFree(cols);CHKERRQ(ierr);
1998   } else {
1999     /* determine buffer space needed for message */
2000     nz = 0;
2001     for (i=0; i<m; i++) {
2002       nz += ourlens[i];
2003     }
2004     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2005 
2006     /* receive message of column indices*/
2007     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2008     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2009     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2010   }
2011 
2012   /* determine column ownership if matrix is not square */
2013   if (N != M) {
2014     n      = N/size + ((N % size) > rank);
2015     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2016     cstart = cend - n;
2017   } else {
2018     cstart = rstart;
2019     cend   = rend;
2020     n      = cend - cstart;
2021   }
2022 
2023   /* loop over local rows, determining number of off diagonal entries */
2024   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2025   jj = 0;
2026   for (i=0; i<m; i++) {
2027     for (j=0; j<ourlens[i]; j++) {
2028       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2029       jj++;
2030     }
2031   }
2032 
2033   /* create our matrix */
2034   for (i=0; i<m; i++) {
2035     ourlens[i] -= offlens[i];
2036   }
2037   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2038   ierr = MatSetType(A,type);CHKERRQ(ierr);
2039   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2040 
2041   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2042   for (i=0; i<m; i++) {
2043     ourlens[i] += offlens[i];
2044   }
2045 
2046   if (!rank) {
2047     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2048 
2049     /* read in my part of the matrix numerical values  */
2050     nz   = procsnz[0];
2051     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2052 
2053     /* insert into matrix */
2054     jj      = rstart;
2055     smycols = mycols;
2056     svals   = vals;
2057     for (i=0; i<m; i++) {
2058       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2059       smycols += ourlens[i];
2060       svals   += ourlens[i];
2061       jj++;
2062     }
2063 
2064     /* read in other processors and ship out */
2065     for (i=1; i<size; i++) {
2066       nz   = procsnz[i];
2067       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2068       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2069     }
2070     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2071   } else {
2072     /* receive numeric values */
2073     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2074 
2075     /* receive message of values*/
2076     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2077     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2078     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2079 
2080     /* insert into matrix */
2081     jj      = rstart;
2082     smycols = mycols;
2083     svals   = vals;
2084     for (i=0; i<m; i++) {
2085       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2086       smycols += ourlens[i];
2087       svals   += ourlens[i];
2088       jj++;
2089     }
2090   }
2091   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2092   ierr = PetscFree(vals);CHKERRQ(ierr);
2093   ierr = PetscFree(mycols);CHKERRQ(ierr);
2094   ierr = PetscFree(rowners);CHKERRQ(ierr);
2095 
2096   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2098   *newmat = A;
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 #undef __FUNCT__
2103 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2104 /*
2105     Not great since it makes two copies of the submatrix, first an SeqAIJ
2106   in local and then by concatenating the local matrices the end result.
2107   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2108 */
2109 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2110 {
2111   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2112   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2113   Mat          *local,M,Mreuse;
2114   PetscScalar  *vwork,*aa;
2115   MPI_Comm     comm = mat->comm;
2116   Mat_SeqAIJ   *aij;
2117 
2118 
2119   PetscFunctionBegin;
2120   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2121   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2122 
2123   if (call ==  MAT_REUSE_MATRIX) {
2124     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2125     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2126     local = &Mreuse;
2127     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2128   } else {
2129     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2130     Mreuse = *local;
2131     ierr   = PetscFree(local);CHKERRQ(ierr);
2132   }
2133 
2134   /*
2135       m - number of local rows
2136       n - number of columns (same on all processors)
2137       rstart - first row in new global matrix generated
2138   */
2139   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2140   if (call == MAT_INITIAL_MATRIX) {
2141     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2142     ii  = aij->i;
2143     jj  = aij->j;
2144 
2145     /*
2146         Determine the number of non-zeros in the diagonal and off-diagonal
2147         portions of the matrix in order to do correct preallocation
2148     */
2149 
2150     /* first get start and end of "diagonal" columns */
2151     if (csize == PETSC_DECIDE) {
2152       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2153       if (mglobal == n) { /* square matrix */
2154 	nlocal = m;
2155       } else {
2156         nlocal = n/size + ((n % size) > rank);
2157       }
2158     } else {
2159       nlocal = csize;
2160     }
2161     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2162     rstart = rend - nlocal;
2163     if (rank == size - 1 && rend != n) {
2164       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2165     }
2166 
2167     /* next, compute all the lengths */
2168     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2169     olens = dlens + m;
2170     for (i=0; i<m; i++) {
2171       jend = ii[i+1] - ii[i];
2172       olen = 0;
2173       dlen = 0;
2174       for (j=0; j<jend; j++) {
2175         if (*jj < rstart || *jj >= rend) olen++;
2176         else dlen++;
2177         jj++;
2178       }
2179       olens[i] = olen;
2180       dlens[i] = dlen;
2181     }
2182     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2183     ierr = PetscFree(dlens);CHKERRQ(ierr);
2184   } else {
2185     int ml,nl;
2186 
2187     M = *newmat;
2188     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2189     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2190     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2191     /*
2192          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2193        rather than the slower MatSetValues().
2194     */
2195     M->was_assembled = PETSC_TRUE;
2196     M->assembled     = PETSC_FALSE;
2197   }
2198   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2199   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2200   ii  = aij->i;
2201   jj  = aij->j;
2202   aa  = aij->a;
2203   for (i=0; i<m; i++) {
2204     row   = rstart + i;
2205     nz    = ii[i+1] - ii[i];
2206     cwork = jj;     jj += nz;
2207     vwork = aa;     aa += nz;
2208     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2209   }
2210 
2211   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2212   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2213   *newmat = M;
2214 
2215   /* save submatrix used in processor for next request */
2216   if (call ==  MAT_INITIAL_MATRIX) {
2217     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2218     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2219   }
2220 
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 #undef __FUNCT__
2225 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2226 /*@C
2227    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2228    (the default parallel PETSc format).  For good matrix assembly performance
2229    the user should preallocate the matrix storage by setting the parameters
2230    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2231    performance can be increased by more than a factor of 50.
2232 
2233    Collective on MPI_Comm
2234 
2235    Input Parameters:
2236 +  A - the matrix
2237 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2238            (same value is used for all local rows)
2239 .  d_nnz - array containing the number of nonzeros in the various rows of the
2240            DIAGONAL portion of the local submatrix (possibly different for each row)
2241            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2242            The size of this array is equal to the number of local rows, i.e 'm'.
2243            You must leave room for the diagonal entry even if it is zero.
2244 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2245            submatrix (same value is used for all local rows).
2246 -  o_nnz - array containing the number of nonzeros in the various rows of the
2247            OFF-DIAGONAL portion of the local submatrix (possibly different for
2248            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2249            structure. The size of this array is equal to the number
2250            of local rows, i.e 'm'.
2251 
2252    The AIJ format (also called the Yale sparse matrix format or
2253    compressed row storage), is fully compatible with standard Fortran 77
2254    storage.  That is, the stored row and column indices can begin at
2255    either one (as in Fortran) or zero.  See the users manual for details.
2256 
2257    The user MUST specify either the local or global matrix dimensions
2258    (possibly both).
2259 
2260    The parallel matrix is partitioned such that the first m0 rows belong to
2261    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2262    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2263 
2264    The DIAGONAL portion of the local submatrix of a processor can be defined
2265    as the submatrix which is obtained by extraction the part corresponding
2266    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2267    first row that belongs to the processor, and r2 is the last row belonging
2268    to the this processor. This is a square mxm matrix. The remaining portion
2269    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2270 
2271    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2272 
2273    By default, this format uses inodes (identical nodes) when possible.
2274    We search for consecutive rows with the same nonzero structure, thereby
2275    reusing matrix information to achieve increased efficiency.
2276 
2277    Options Database Keys:
2278 +  -mat_aij_no_inode  - Do not use inodes
2279 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2280 -  -mat_aij_oneindex - Internally use indexing starting at 1
2281         rather than 0.  Note that when calling MatSetValues(),
2282         the user still MUST index entries starting at 0!
2283 
2284    Example usage:
2285 
2286    Consider the following 8x8 matrix with 34 non-zero values, that is
2287    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2288    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2289    as follows:
2290 
2291 .vb
2292             1  2  0  |  0  3  0  |  0  4
2293     Proc0   0  5  6  |  7  0  0  |  8  0
2294             9  0 10  | 11  0  0  | 12  0
2295     -------------------------------------
2296            13  0 14  | 15 16 17  |  0  0
2297     Proc1   0 18  0  | 19 20 21  |  0  0
2298             0  0  0  | 22 23  0  | 24  0
2299     -------------------------------------
2300     Proc2  25 26 27  |  0  0 28  | 29  0
2301            30  0  0  | 31 32 33  |  0 34
2302 .ve
2303 
2304    This can be represented as a collection of submatrices as:
2305 
2306 .vb
2307       A B C
2308       D E F
2309       G H I
2310 .ve
2311 
2312    Where the submatrices A,B,C are owned by proc0, D,E,F are
2313    owned by proc1, G,H,I are owned by proc2.
2314 
2315    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2316    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2317    The 'M','N' parameters are 8,8, and have the same values on all procs.
2318 
2319    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2320    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2321    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2322    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2323    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2324    matrix, ans [DF] as another SeqAIJ matrix.
2325 
2326    When d_nz, o_nz parameters are specified, d_nz storage elements are
2327    allocated for every row of the local diagonal submatrix, and o_nz
2328    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2329    One way to choose d_nz and o_nz is to use the max nonzerors per local
2330    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2331    In this case, the values of d_nz,o_nz are:
2332 .vb
2333      proc0 : dnz = 2, o_nz = 2
2334      proc1 : dnz = 3, o_nz = 2
2335      proc2 : dnz = 1, o_nz = 4
2336 .ve
2337    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2338    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2339    for proc3. i.e we are using 12+15+10=37 storage locations to store
2340    34 values.
2341 
2342    When d_nnz, o_nnz parameters are specified, the storage is specified
2343    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2344    In the above case the values for d_nnz,o_nnz are:
2345 .vb
2346      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2347      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2348      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2349 .ve
2350    Here the space allocated is sum of all the above values i.e 34, and
2351    hence pre-allocation is perfect.
2352 
2353    Level: intermediate
2354 
2355 .keywords: matrix, aij, compressed row, sparse, parallel
2356 
2357 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2358 @*/
2359 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2360 {
2361   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2362 
2363   PetscFunctionBegin;
2364   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2365   if (f) {
2366     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2367   }
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 #undef __FUNCT__
2372 #define __FUNCT__ "MatCreateMPIAIJ"
2373 /*@C
2374    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2375    (the default parallel PETSc format).  For good matrix assembly performance
2376    the user should preallocate the matrix storage by setting the parameters
2377    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2378    performance can be increased by more than a factor of 50.
2379 
2380    Collective on MPI_Comm
2381 
2382    Input Parameters:
2383 +  comm - MPI communicator
2384 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2385            This value should be the same as the local size used in creating the
2386            y vector for the matrix-vector product y = Ax.
2387 .  n - This value should be the same as the local size used in creating the
2388        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2389        calculated if N is given) For square matrices n is almost always m.
2390 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2391 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2392 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2393            (same value is used for all local rows)
2394 .  d_nnz - array containing the number of nonzeros in the various rows of the
2395            DIAGONAL portion of the local submatrix (possibly different for each row)
2396            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2397            The size of this array is equal to the number of local rows, i.e 'm'.
2398            You must leave room for the diagonal entry even if it is zero.
2399 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2400            submatrix (same value is used for all local rows).
2401 -  o_nnz - array containing the number of nonzeros in the various rows of the
2402            OFF-DIAGONAL portion of the local submatrix (possibly different for
2403            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2404            structure. The size of this array is equal to the number
2405            of local rows, i.e 'm'.
2406 
2407    Output Parameter:
2408 .  A - the matrix
2409 
2410    Notes:
2411    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2412    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2413    storage requirements for this matrix.
2414 
2415    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2416    processor than it must be used on all processors that share the object for
2417    that argument.
2418 
2419    The AIJ format (also called the Yale sparse matrix format or
2420    compressed row storage), is fully compatible with standard Fortran 77
2421    storage.  That is, the stored row and column indices can begin at
2422    either one (as in Fortran) or zero.  See the users manual for details.
2423 
2424    The user MUST specify either the local or global matrix dimensions
2425    (possibly both).
2426 
2427    The parallel matrix is partitioned such that the first m0 rows belong to
2428    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2429    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2430 
2431    The DIAGONAL portion of the local submatrix of a processor can be defined
2432    as the submatrix which is obtained by extraction the part corresponding
2433    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2434    first row that belongs to the processor, and r2 is the last row belonging
2435    to the this processor. This is a square mxm matrix. The remaining portion
2436    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2437 
2438    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2439 
2440    When calling this routine with a single process communicator, a matrix of
2441    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2442    type of communicator, use the construction mechanism:
2443      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2444 
2445    By default, this format uses inodes (identical nodes) when possible.
2446    We search for consecutive rows with the same nonzero structure, thereby
2447    reusing matrix information to achieve increased efficiency.
2448 
2449    Options Database Keys:
2450 +  -mat_aij_no_inode  - Do not use inodes
2451 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2452 -  -mat_aij_oneindex - Internally use indexing starting at 1
2453         rather than 0.  Note that when calling MatSetValues(),
2454         the user still MUST index entries starting at 0!
2455 
2456 
2457    Example usage:
2458 
2459    Consider the following 8x8 matrix with 34 non-zero values, that is
2460    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2461    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2462    as follows:
2463 
2464 .vb
2465             1  2  0  |  0  3  0  |  0  4
2466     Proc0   0  5  6  |  7  0  0  |  8  0
2467             9  0 10  | 11  0  0  | 12  0
2468     -------------------------------------
2469            13  0 14  | 15 16 17  |  0  0
2470     Proc1   0 18  0  | 19 20 21  |  0  0
2471             0  0  0  | 22 23  0  | 24  0
2472     -------------------------------------
2473     Proc2  25 26 27  |  0  0 28  | 29  0
2474            30  0  0  | 31 32 33  |  0 34
2475 .ve
2476 
2477    This can be represented as a collection of submatrices as:
2478 
2479 .vb
2480       A B C
2481       D E F
2482       G H I
2483 .ve
2484 
2485    Where the submatrices A,B,C are owned by proc0, D,E,F are
2486    owned by proc1, G,H,I are owned by proc2.
2487 
2488    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2489    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2490    The 'M','N' parameters are 8,8, and have the same values on all procs.
2491 
2492    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2493    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2494    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2495    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2496    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2497    matrix, ans [DF] as another SeqAIJ matrix.
2498 
2499    When d_nz, o_nz parameters are specified, d_nz storage elements are
2500    allocated for every row of the local diagonal submatrix, and o_nz
2501    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2502    One way to choose d_nz and o_nz is to use the max nonzerors per local
2503    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2504    In this case, the values of d_nz,o_nz are:
2505 .vb
2506      proc0 : dnz = 2, o_nz = 2
2507      proc1 : dnz = 3, o_nz = 2
2508      proc2 : dnz = 1, o_nz = 4
2509 .ve
2510    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2511    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2512    for proc3. i.e we are using 12+15+10=37 storage locations to store
2513    34 values.
2514 
2515    When d_nnz, o_nnz parameters are specified, the storage is specified
2516    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2517    In the above case the values for d_nnz,o_nnz are:
2518 .vb
2519      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2520      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2521      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2522 .ve
2523    Here the space allocated is sum of all the above values i.e 34, and
2524    hence pre-allocation is perfect.
2525 
2526    Level: intermediate
2527 
2528 .keywords: matrix, aij, compressed row, sparse, parallel
2529 
2530 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2531 @*/
2532 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2533 {
2534   int ierr,size;
2535 
2536   PetscFunctionBegin;
2537   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2538   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2539   if (size > 1) {
2540     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2541     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2542   } else {
2543     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2544     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2545   }
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 #undef __FUNCT__
2550 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2551 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2552 {
2553   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2554   PetscFunctionBegin;
2555   *Ad     = a->A;
2556   *Ao     = a->B;
2557   *colmap = a->garray;
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2563 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2564 {
2565   int        ierr,i;
2566   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2567 
2568   PetscFunctionBegin;
2569   if (coloring->ctype == IS_COLORING_LOCAL) {
2570     ISColoringValue *allcolors,*colors;
2571     ISColoring      ocoloring;
2572 
2573     /* set coloring for diagonal portion */
2574     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2575 
2576     /* set coloring for off-diagonal portion */
2577     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2578     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2579     for (i=0; i<a->B->n; i++) {
2580       colors[i] = allcolors[a->garray[i]];
2581     }
2582     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2583     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2584     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2585     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2586   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2587     ISColoringValue *colors;
2588     int             *larray;
2589     ISColoring      ocoloring;
2590 
2591     /* set coloring for diagonal portion */
2592     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2593     for (i=0; i<a->A->n; i++) {
2594       larray[i] = i + a->cstart;
2595     }
2596     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2597     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2598     for (i=0; i<a->A->n; i++) {
2599       colors[i] = coloring->colors[larray[i]];
2600     }
2601     ierr = PetscFree(larray);CHKERRQ(ierr);
2602     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2603     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2604     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2605 
2606     /* set coloring for off-diagonal portion */
2607     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2608     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2609     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2610     for (i=0; i<a->B->n; i++) {
2611       colors[i] = coloring->colors[larray[i]];
2612     }
2613     ierr = PetscFree(larray);CHKERRQ(ierr);
2614     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2615     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2616     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2617   } else {
2618     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2619   }
2620 
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 #undef __FUNCT__
2625 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2626 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2627 {
2628   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2629   int        ierr;
2630 
2631   PetscFunctionBegin;
2632   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2633   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 #undef __FUNCT__
2638 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2639 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2640 {
2641   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2642   int        ierr;
2643 
2644   PetscFunctionBegin;
2645   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2646   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatMerge"
2652 /*@C
2653       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2654                  matrices from each processor
2655 
2656     Collective on MPI_Comm
2657 
2658    Input Parameters:
2659 +    comm - the communicators the parallel matrix will live on
2660 -    inmat - the input sequential matrices
2661 
2662    Output Parameter:
2663 .    outmat - the parallel matrix generated
2664 
2665     Level: advanced
2666 
2667    Notes: The number of columns of the matrix in EACH of the seperate files
2668       MUST be the same.
2669 
2670 @*/
2671 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2672 {
2673   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2674   PetscScalar *values;
2675   PetscMap    columnmap,rowmap;
2676 
2677   PetscFunctionBegin;
2678 
2679   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2680 
2681   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2682   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2683   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2684   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2685   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2686   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2687 
2688   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2689   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2690   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2691   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2692   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2693 
2694   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2695   for (i=0;i<m;i++) {
2696     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2697     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2698     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2699   }
2700   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2701   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2702 
2703   for (i=0;i<m;i++) {
2704     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2705     I    = i + rstart;
2706     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2707     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2708   }
2709   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2710   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2712 
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatFileSplit"
2718 int MatFileSplit(Mat A,char *outfile)
2719 {
2720   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2721   PetscViewer out;
2722   char        *name;
2723   Mat         B;
2724   PetscScalar *values;
2725 
2726   PetscFunctionBegin;
2727 
2728   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2729   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2730   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2731   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2732   for (i=0;i<m;i++) {
2733     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2734     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2735     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2736   }
2737   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2738   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2739 
2740   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2741   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2742   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2743   sprintf(name,"%s.%d",outfile,rank);
2744   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2745   ierr = PetscFree(name);
2746   ierr = MatView(B,out);CHKERRQ(ierr);
2747   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2748   ierr = MatDestroy(B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751