xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 int CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   int        n = aij->B->n,i,ierr;
18 
19   PetscFunctionBegin;
20 #if defined (PETSC_USE_CTABLE)
21   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
22   for (i=0; i<n; i++){
23     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
24   }
25 #else
26   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
27   PetscLogObjectMemory(mat,mat->N*sizeof(int));
28   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
29   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
30 #endif
31   PetscFunctionReturn(0);
32 }
33 
34 #define CHUNKSIZE   15
35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
36 { \
37  \
38     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
39     rmax = aimax[row]; nrow = ailen[row];  \
40     col1 = col - shift; \
41      \
42     low = 0; high = nrow; \
43     while (high-low > 5) { \
44       t = (low+high)/2; \
45       if (rp[t] > col) high = t; \
46       else             low  = t; \
47     } \
48       for (_i=low; _i<high; _i++) { \
49         if (rp[_i] > col1) break; \
50         if (rp[_i] == col1) { \
51           if (addv == ADD_VALUES) ap[_i] += value;   \
52           else                  ap[_i] = value; \
53           goto a_noinsert; \
54         } \
55       }  \
56       if (nonew == 1) goto a_noinsert; \
57       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
58       if (nrow >= rmax) { \
59         /* there is no extra room in row, therefore enlarge */ \
60         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
61         PetscScalar *new_a; \
62  \
63         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
64  \
65         /* malloc new storage space */ \
66         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
67         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
68         new_j   = (int*)(new_a + new_nz); \
69         new_i   = new_j + new_nz; \
70  \
71         /* copy over old data into new slots */ \
72         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
73         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
74         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
75         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
76         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
77                                                            len*sizeof(int));CHKERRQ(ierr); \
78         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
80                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
81         /* free up old matrix storage */ \
82  \
83         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
84         if (!a->singlemalloc) { \
85            ierr = PetscFree(a->i);CHKERRQ(ierr); \
86            ierr = PetscFree(a->j);CHKERRQ(ierr); \
87         } \
88         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
89         a->singlemalloc = PETSC_TRUE; \
90  \
91         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
92         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
93         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
94         a->maxnz += CHUNKSIZE; \
95         a->reallocs++; \
96       } \
97       N = nrow++ - 1; a->nz++; \
98       /* shift up all the later entries in this row */ \
99       for (ii=N; ii>=_i; ii--) { \
100         rp[ii+1] = rp[ii]; \
101         ap[ii+1] = ap[ii]; \
102       } \
103       rp[_i] = col1;  \
104       ap[_i] = value;  \
105       a_noinsert: ; \
106       ailen[row] = nrow; \
107 }
108 
109 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
110 { \
111  \
112     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
113     rmax = bimax[row]; nrow = bilen[row];  \
114     col1 = col - shift; \
115      \
116     low = 0; high = nrow; \
117     while (high-low > 5) { \
118       t = (low+high)/2; \
119       if (rp[t] > col) high = t; \
120       else             low  = t; \
121     } \
122        for (_i=low; _i<high; _i++) { \
123         if (rp[_i] > col1) break; \
124         if (rp[_i] == col1) { \
125           if (addv == ADD_VALUES) ap[_i] += value;   \
126           else                  ap[_i] = value; \
127           goto b_noinsert; \
128         } \
129       }  \
130       if (nonew == 1) goto b_noinsert; \
131       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
132       if (nrow >= rmax) { \
133         /* there is no extra room in row, therefore enlarge */ \
134         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
135         PetscScalar *new_a; \
136  \
137         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
138  \
139         /* malloc new storage space */ \
140         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
141         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
142         new_j   = (int*)(new_a + new_nz); \
143         new_i   = new_j + new_nz; \
144  \
145         /* copy over old data into new slots */ \
146         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
147         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
148         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
149         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
150         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
151                                                            len*sizeof(int));CHKERRQ(ierr); \
152         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
155         /* free up old matrix storage */ \
156  \
157         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
158         if (!b->singlemalloc) { \
159           ierr = PetscFree(b->i);CHKERRQ(ierr); \
160           ierr = PetscFree(b->j);CHKERRQ(ierr); \
161         } \
162         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
163         b->singlemalloc = PETSC_TRUE; \
164  \
165         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
166         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
167         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
168         b->maxnz += CHUNKSIZE; \
169         b->reallocs++; \
170       } \
171       N = nrow++ - 1; b->nz++; \
172       /* shift up all the later entries in this row */ \
173       for (ii=N; ii>=_i; ii--) { \
174         rp[ii+1] = rp[ii]; \
175         ap[ii+1] = ap[ii]; \
176       } \
177       rp[_i] = col1;  \
178       ap[_i] = value;  \
179       b_noinsert: ; \
180       bilen[row] = nrow; \
181 }
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "MatSetValues_MPIAIJ"
185 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
186 {
187   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
188   PetscScalar  value;
189   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
190   int          cstart = aij->cstart,cend = aij->cend,row,col;
191   PetscTruth   roworiented = aij->roworiented;
192 
193   /* Some Variables required in the macro */
194   Mat          A = aij->A;
195   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
196   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
197   PetscScalar  *aa = a->a;
198   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
199   Mat          B = aij->B;
200   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
201   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
202   PetscScalar  *ba = b->a;
203 
204   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
205   int          nonew = a->nonew,shift=0;
206   PetscScalar  *ap;
207 
208   PetscFunctionBegin;
209   for (i=0; i<m; i++) {
210     if (im[i] < 0) continue;
211 #if defined(PETSC_USE_BOPT_g)
212     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
213 #endif
214     if (im[i] >= rstart && im[i] < rend) {
215       row = im[i] - rstart;
216       for (j=0; j<n; j++) {
217         if (in[j] >= cstart && in[j] < cend){
218           col = in[j] - cstart;
219           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
220           if (ignorezeroentries && value == 0.0) continue;
221           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
222           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
223         } else if (in[j] < 0) continue;
224 #if defined(PETSC_USE_BOPT_g)
225         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
226 #endif
227         else {
228           if (mat->was_assembled) {
229             if (!aij->colmap) {
230               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
231             }
232 #if defined (PETSC_USE_CTABLE)
233             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
234 	    col--;
235 #else
236             col = aij->colmap[in[j]] - 1;
237 #endif
238             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
239               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
240               col =  in[j];
241               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
242               B = aij->B;
243               b = (Mat_SeqAIJ*)B->data;
244               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
245               ba = b->a;
246             }
247           } else col = in[j];
248           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
249           if (ignorezeroentries && value == 0.0) continue;
250           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
251           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
252         }
253       }
254     } else {
255       if (!aij->donotstash) {
256         if (roworiented) {
257           if (ignorezeroentries && v[i*n] == 0.0) continue;
258           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
259         } else {
260           if (ignorezeroentries && v[i] == 0.0) continue;
261           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
262         }
263       }
264     }
265   }
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatGetValues_MPIAIJ"
271 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
272 {
273   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
274   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
275   int        cstart = aij->cstart,cend = aij->cend,row,col;
276 
277   PetscFunctionBegin;
278   for (i=0; i<m; i++) {
279     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
280     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
281     if (idxm[i] >= rstart && idxm[i] < rend) {
282       row = idxm[i] - rstart;
283       for (j=0; j<n; j++) {
284         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
285         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
286         if (idxn[j] >= cstart && idxn[j] < cend){
287           col = idxn[j] - cstart;
288           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
289         } else {
290           if (!aij->colmap) {
291             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
292           }
293 #if defined (PETSC_USE_CTABLE)
294           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
295           col --;
296 #else
297           col = aij->colmap[idxn[j]] - 1;
298 #endif
299           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
300           else {
301             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
302           }
303         }
304       }
305     } else {
306       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
307     }
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
314 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
315 {
316   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
317   int         ierr,nstash,reallocs;
318   InsertMode  addv;
319 
320   PetscFunctionBegin;
321   if (aij->donotstash) {
322     PetscFunctionReturn(0);
323   }
324 
325   /* make sure all processors are either in INSERTMODE or ADDMODE */
326   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
327   if (addv == (ADD_VALUES|INSERT_VALUES)) {
328     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
329   }
330   mat->insertmode = addv; /* in case this processor had no cache */
331 
332   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
333   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
334   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
335   PetscFunctionReturn(0);
336 }
337 
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
341 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
342 {
343   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
344   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
345   int         i,j,rstart,ncols,n,ierr,flg;
346   int         *row,*col,other_disassembled;
347   PetscScalar *val;
348   InsertMode  addv = mat->insertmode;
349 
350   PetscFunctionBegin;
351   if (!aij->donotstash) {
352     while (1) {
353       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
354       if (!flg) break;
355 
356       for (i=0; i<n;) {
357         /* Now identify the consecutive vals belonging to the same row */
358         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
359         if (j < n) ncols = j-i;
360         else       ncols = n-i;
361         /* Now assemble all these values with a single function call */
362         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
363         i = j;
364       }
365     }
366     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
367   }
368 
369   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
370   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
371 
372   /* determine if any processor has disassembled, if so we must
373      also disassemble ourselfs, in order that we may reassemble. */
374   /*
375      if nonzero structure of submatrix B cannot change then we know that
376      no processor disassembled thus we can skip this stuff
377   */
378   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
379     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
380     if (mat->was_assembled && !other_disassembled) {
381       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
382     }
383   }
384 
385   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
386     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
387   }
388   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
389   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
390 
391   if (aij->rowvalues) {
392     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
393     aij->rowvalues = 0;
394   }
395 
396   /* used by MatAXPY() */
397   a->xtoy = 0; b->xtoy = 0;
398   a->XtoY = 0; b->XtoY = 0;
399 
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
405 int MatZeroEntries_MPIAIJ(Mat A)
406 {
407   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
408   int        ierr;
409 
410   PetscFunctionBegin;
411   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
412   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatZeroRows_MPIAIJ"
418 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
419 {
420   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
421   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
422   int            *nprocs,j,idx,nsends,row;
423   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
424   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
425   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
426   MPI_Comm       comm = A->comm;
427   MPI_Request    *send_waits,*recv_waits;
428   MPI_Status     recv_status,*send_status;
429   IS             istmp;
430   PetscTruth     found;
431 
432   PetscFunctionBegin;
433   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
434   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
435 
436   /*  first count number of contributors to each processor */
437   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
438   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
439   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
440   for (i=0; i<N; i++) {
441     idx = rows[i];
442     found = PETSC_FALSE;
443     for (j=0; j<size; j++) {
444       if (idx >= owners[j] && idx < owners[j+1]) {
445         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
446       }
447     }
448     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
449   }
450   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
451 
452   /* inform other processors of number of messages and max length*/
453   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
454 
455   /* post receives:   */
456   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
457   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
458   for (i=0; i<nrecvs; i++) {
459     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
460   }
461 
462   /* do sends:
463       1) starts[i] gives the starting index in svalues for stuff going to
464          the ith processor
465   */
466   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
467   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
468   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
469   starts[0] = 0;
470   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
471   for (i=0; i<N; i++) {
472     svalues[starts[owner[i]]++] = rows[i];
473   }
474   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
475 
476   starts[0] = 0;
477   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
478   count = 0;
479   for (i=0; i<size; i++) {
480     if (nprocs[2*i+1]) {
481       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
482     }
483   }
484   ierr = PetscFree(starts);CHKERRQ(ierr);
485 
486   base = owners[rank];
487 
488   /*  wait on receives */
489   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
490   source = lens + nrecvs;
491   count  = nrecvs; slen = 0;
492   while (count) {
493     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
494     /* unpack receives into our local space */
495     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
496     source[imdex]  = recv_status.MPI_SOURCE;
497     lens[imdex]    = n;
498     slen          += n;
499     count--;
500   }
501   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
502 
503   /* move the data into the send scatter */
504   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
505   count = 0;
506   for (i=0; i<nrecvs; i++) {
507     values = rvalues + i*nmax;
508     for (j=0; j<lens[i]; j++) {
509       lrows[count++] = values[j] - base;
510     }
511   }
512   ierr = PetscFree(rvalues);CHKERRQ(ierr);
513   ierr = PetscFree(lens);CHKERRQ(ierr);
514   ierr = PetscFree(owner);CHKERRQ(ierr);
515   ierr = PetscFree(nprocs);CHKERRQ(ierr);
516 
517   /* actually zap the local rows */
518   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
519   PetscLogObjectParent(A,istmp);
520 
521   /*
522         Zero the required rows. If the "diagonal block" of the matrix
523      is square and the user wishes to set the diagonal we use seperate
524      code so that MatSetValues() is not called for each diagonal allocating
525      new memory, thus calling lots of mallocs and slowing things down.
526 
527        Contributed by: Mathew Knepley
528   */
529   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
530   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
531   if (diag && (l->A->M == l->A->N)) {
532     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
533   } else if (diag) {
534     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
535     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
536       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
537 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
538     }
539     for (i = 0; i < slen; i++) {
540       row  = lrows[i] + rstart;
541       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
542     }
543     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
544     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545   } else {
546     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
547   }
548   ierr = ISDestroy(istmp);CHKERRQ(ierr);
549   ierr = PetscFree(lrows);CHKERRQ(ierr);
550 
551   /* wait on sends */
552   if (nsends) {
553     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
554     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
555     ierr = PetscFree(send_status);CHKERRQ(ierr);
556   }
557   ierr = PetscFree(send_waits);CHKERRQ(ierr);
558   ierr = PetscFree(svalues);CHKERRQ(ierr);
559 
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatMult_MPIAIJ"
565 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
566 {
567   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
568   int        ierr,nt;
569 
570   PetscFunctionBegin;
571   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
572   if (nt != A->n) {
573     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
574   }
575   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
576   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
577   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
578   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatMultAdd_MPIAIJ"
584 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
585 {
586   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
587   int        ierr;
588 
589   PetscFunctionBegin;
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
599 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602   int        ierr;
603 
604   PetscFunctionBegin;
605   /* do nondiagonal part */
606   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
607   /* send it on its way */
608   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
609   /* do local part */
610   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
611   /* receive remote parts: note this assumes the values are not actually */
612   /* inserted in yy until the next line, which is true for my implementation*/
613   /* but is not perhaps always true. */
614   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 
618 EXTERN_C_BEGIN
619 #undef __FUNCT__
620 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
621 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
622 {
623   MPI_Comm comm;
624   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
625   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
626   IS         Me,Notme;
627   int        M,N,first,last,*notme,ntids,i, ierr;
628 
629   PetscFunctionBegin;
630 
631   /* Easy test: symmetric diagonal block */
632   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
633   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
634   if (!*f) PetscFunctionReturn(0);
635   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
636   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
637   if (ntids==1) PetscFunctionReturn(0);
638 
639   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
640   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
641   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
642   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
643   for (i=0; i<first; i++) notme[i] = i;
644   for (i=last; i<M; i++) notme[i-last+first] = i;
645   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
646   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
647   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
648   Aoff = Aoffs[0];
649   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
650   Boff = Boffs[0];
651   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
652   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
653   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
654   ierr = ISDestroy(Me);CHKERRQ(ierr);
655   ierr = ISDestroy(Notme);CHKERRQ(ierr);
656 
657   PetscFunctionReturn(0);
658 }
659 EXTERN_C_END
660 
661 #undef __FUNCT__
662 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
663 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
664 {
665   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
666   int        ierr;
667 
668   PetscFunctionBegin;
669   /* do nondiagonal part */
670   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
671   /* send it on its way */
672   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
673   /* do local part */
674   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
675   /* receive remote parts: note this assumes the values are not actually */
676   /* inserted in yy until the next line, which is true for my implementation*/
677   /* but is not perhaps always true. */
678   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
679   PetscFunctionReturn(0);
680 }
681 
682 /*
683   This only works correctly for square matrices where the subblock A->A is the
684    diagonal block
685 */
686 #undef __FUNCT__
687 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
688 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
689 {
690   int        ierr;
691   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
692 
693   PetscFunctionBegin;
694   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
695   if (a->rstart != a->cstart || a->rend != a->cend) {
696     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
697   }
698   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 
702 #undef __FUNCT__
703 #define __FUNCT__ "MatScale_MPIAIJ"
704 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
705 {
706   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
707   int        ierr;
708 
709   PetscFunctionBegin;
710   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
711   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatDestroy_MPIAIJ"
717 int MatDestroy_MPIAIJ(Mat mat)
718 {
719   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
720   int        ierr;
721 
722   PetscFunctionBegin;
723 #if defined(PETSC_USE_LOG)
724   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
725 #endif
726   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
727   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
728   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
729   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
730 #if defined (PETSC_USE_CTABLE)
731   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
732 #else
733   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
734 #endif
735   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
736   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
737   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
738   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
739   ierr = PetscFree(aij);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatView_MPIAIJ_Binary"
745 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
746 {
747   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
748   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
749   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
750   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
751   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->M;
761     header[2] = mat->N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->m;
767     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->m;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->m; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->m; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->m; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   int               ierr,rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,flg,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo info;
884       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
885       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
886       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
887       if (flg) {
888         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
889 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
890       } else {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
892 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
893       }
894       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
895       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
896       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
898       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
899       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
900       PetscFunctionReturn(0);
901     } else if (format == PETSC_VIEWER_ASCII_INFO) {
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
904       PetscFunctionReturn(0);
905     }
906   } else if (isbinary) {
907     if (size == 1) {
908       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
909       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
910     } else {
911       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
912     }
913     PetscFunctionReturn(0);
914   } else if (isdraw) {
915     PetscDraw  draw;
916     PetscTruth isnull;
917     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
918     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
919   }
920 
921   if (size == 1) {
922     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
923     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
924   } else {
925     /* assemble the entire matrix onto first processor. */
926     Mat         A;
927     Mat_SeqAIJ *Aloc;
928     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
929     PetscScalar *a;
930 
931     if (!rank) {
932       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
933     } else {
934       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
935     }
936     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
937     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
938     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
939     PetscLogObjectParent(mat,A);
940 
941     /* copy over the A part */
942     Aloc = (Mat_SeqAIJ*)aij->A->data;
943     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
944     row = aij->rstart;
945     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
946     for (i=0; i<m; i++) {
947       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
948       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
949     }
950     aj = Aloc->j;
951     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
952 
953     /* copy over the B part */
954     Aloc = (Mat_SeqAIJ*)aij->B->data;
955     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row  = aij->rstart;
957     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
958     ct   = cols;
959     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
960     for (i=0; i<m; i++) {
961       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
962       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
963     }
964     ierr = PetscFree(ct);CHKERRQ(ierr);
965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     /*
968        Everyone has to call to draw the matrix since the graphics waits are
969        synchronized across all processors that share the PetscDraw object
970     */
971     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
972     if (!rank) {
973       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
974       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
975     }
976     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
977     ierr = MatDestroy(A);CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatView_MPIAIJ"
984 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
985 {
986   int        ierr;
987   PetscTruth iascii,isdraw,issocket,isbinary;
988 
989   PetscFunctionBegin;
990   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
991   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
992   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
993   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
994   if (iascii || isdraw || isbinary || issocket) {
995     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
996   } else {
997     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
998   }
999   PetscFunctionReturn(0);
1000 }
1001 
1002 
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatRelax_MPIAIJ"
1006 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1007 {
1008   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1009   int          ierr;
1010   Vec          bb1;
1011   PetscScalar  mone=-1.0;
1012 
1013   PetscFunctionBegin;
1014   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1015 
1016   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1017 
1018   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1019     if (flag & SOR_ZERO_INITIAL_GUESS) {
1020       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1021       its--;
1022     }
1023 
1024     while (its--) {
1025       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1026       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1027 
1028       /* update rhs: bb1 = bb - B*x */
1029       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1030       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1031 
1032       /* local sweep */
1033       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1034       CHKERRQ(ierr);
1035     }
1036   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1037     if (flag & SOR_ZERO_INITIAL_GUESS) {
1038       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1039       its--;
1040     }
1041     while (its--) {
1042       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1043       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1044 
1045       /* update rhs: bb1 = bb - B*x */
1046       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1047       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1048 
1049       /* local sweep */
1050       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1051       CHKERRQ(ierr);
1052     }
1053   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1054     if (flag & SOR_ZERO_INITIAL_GUESS) {
1055       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1056       its--;
1057     }
1058     while (its--) {
1059       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1060       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1061 
1062       /* update rhs: bb1 = bb - B*x */
1063       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1064       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1065 
1066       /* local sweep */
1067       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1068       CHKERRQ(ierr);
1069     }
1070   } else {
1071     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1072   }
1073 
1074   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1080 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1081 {
1082   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1083   Mat        A = mat->A,B = mat->B;
1084   int        ierr;
1085   PetscReal  isend[5],irecv[5];
1086 
1087   PetscFunctionBegin;
1088   info->block_size     = 1.0;
1089   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1090   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1091   isend[3] = info->memory;  isend[4] = info->mallocs;
1092   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1093   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1094   isend[3] += info->memory;  isend[4] += info->mallocs;
1095   if (flag == MAT_LOCAL) {
1096     info->nz_used      = isend[0];
1097     info->nz_allocated = isend[1];
1098     info->nz_unneeded  = isend[2];
1099     info->memory       = isend[3];
1100     info->mallocs      = isend[4];
1101   } else if (flag == MAT_GLOBAL_MAX) {
1102     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1103     info->nz_used      = irecv[0];
1104     info->nz_allocated = irecv[1];
1105     info->nz_unneeded  = irecv[2];
1106     info->memory       = irecv[3];
1107     info->mallocs      = irecv[4];
1108   } else if (flag == MAT_GLOBAL_SUM) {
1109     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1110     info->nz_used      = irecv[0];
1111     info->nz_allocated = irecv[1];
1112     info->nz_unneeded  = irecv[2];
1113     info->memory       = irecv[3];
1114     info->mallocs      = irecv[4];
1115   }
1116   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1117   info->fill_ratio_needed = 0;
1118   info->factor_mallocs    = 0;
1119   info->rows_global       = (double)matin->M;
1120   info->columns_global    = (double)matin->N;
1121   info->rows_local        = (double)matin->m;
1122   info->columns_local     = (double)matin->N;
1123 
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatSetOption_MPIAIJ"
1129 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1130 {
1131   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1132   int        ierr;
1133 
1134   PetscFunctionBegin;
1135   switch (op) {
1136   case MAT_NO_NEW_NONZERO_LOCATIONS:
1137   case MAT_YES_NEW_NONZERO_LOCATIONS:
1138   case MAT_COLUMNS_UNSORTED:
1139   case MAT_COLUMNS_SORTED:
1140   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1141   case MAT_KEEP_ZEROED_ROWS:
1142   case MAT_NEW_NONZERO_LOCATION_ERR:
1143   case MAT_USE_INODES:
1144   case MAT_DO_NOT_USE_INODES:
1145   case MAT_IGNORE_ZERO_ENTRIES:
1146     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1147     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1148     break;
1149   case MAT_ROW_ORIENTED:
1150     a->roworiented = PETSC_TRUE;
1151     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1152     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1153     break;
1154   case MAT_ROWS_SORTED:
1155   case MAT_ROWS_UNSORTED:
1156   case MAT_YES_NEW_DIAGONALS:
1157     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1158     break;
1159   case MAT_COLUMN_ORIENTED:
1160     a->roworiented = PETSC_FALSE;
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_IGNORE_OFF_PROC_ENTRIES:
1165     a->donotstash = PETSC_TRUE;
1166     break;
1167   case MAT_NO_NEW_DIAGONALS:
1168     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1169   case MAT_SYMMETRIC:
1170   case MAT_STRUCTURALLY_SYMMETRIC:
1171   case MAT_HERMITIAN:
1172   case MAT_SYMMETRY_ETERNAL:
1173     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1174     break;
1175   case MAT_NOT_SYMMETRIC:
1176   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1177   case MAT_NOT_HERMITIAN:
1178   case MAT_NOT_SYMMETRY_ETERNAL:
1179     break;
1180   default:
1181     SETERRQ(PETSC_ERR_SUP,"unknown option");
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatGetRow_MPIAIJ"
1188 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1189 {
1190   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1191   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1192   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1193   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1194   int          *cmap,*idx_p;
1195 
1196   PetscFunctionBegin;
1197   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1198   mat->getrowactive = PETSC_TRUE;
1199 
1200   if (!mat->rowvalues && (idx || v)) {
1201     /*
1202         allocate enough space to hold information from the longest row.
1203     */
1204     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1205     int     max = 1,tmp;
1206     for (i=0; i<matin->m; i++) {
1207       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1208       if (max < tmp) { max = tmp; }
1209     }
1210     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1211     mat->rowindices = (int*)(mat->rowvalues + max);
1212   }
1213 
1214   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1215   lrow = row - rstart;
1216 
1217   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1218   if (!v)   {pvA = 0; pvB = 0;}
1219   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1220   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1221   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1222   nztot = nzA + nzB;
1223 
1224   cmap  = mat->garray;
1225   if (v  || idx) {
1226     if (nztot) {
1227       /* Sort by increasing column numbers, assuming A and B already sorted */
1228       int imark = -1;
1229       if (v) {
1230         *v = v_p = mat->rowvalues;
1231         for (i=0; i<nzB; i++) {
1232           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1233           else break;
1234         }
1235         imark = i;
1236         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1237         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1238       }
1239       if (idx) {
1240         *idx = idx_p = mat->rowindices;
1241         if (imark > -1) {
1242           for (i=0; i<imark; i++) {
1243             idx_p[i] = cmap[cworkB[i]];
1244           }
1245         } else {
1246           for (i=0; i<nzB; i++) {
1247             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1248             else break;
1249           }
1250           imark = i;
1251         }
1252         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1253         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1254       }
1255     } else {
1256       if (idx) *idx = 0;
1257       if (v)   *v   = 0;
1258     }
1259   }
1260   *nz = nztot;
1261   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1262   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1268 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1269 {
1270   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1271 
1272   PetscFunctionBegin;
1273   if (aij->getrowactive == PETSC_FALSE) {
1274     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1275   }
1276   aij->getrowactive = PETSC_FALSE;
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatNorm_MPIAIJ"
1282 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1283 {
1284   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1285   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1286   int          ierr,i,j,cstart = aij->cstart;
1287   PetscReal    sum = 0.0;
1288   PetscScalar  *v;
1289 
1290   PetscFunctionBegin;
1291   if (aij->size == 1) {
1292     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1293   } else {
1294     if (type == NORM_FROBENIUS) {
1295       v = amat->a;
1296       for (i=0; i<amat->nz; i++) {
1297 #if defined(PETSC_USE_COMPLEX)
1298         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1299 #else
1300         sum += (*v)*(*v); v++;
1301 #endif
1302       }
1303       v = bmat->a;
1304       for (i=0; i<bmat->nz; i++) {
1305 #if defined(PETSC_USE_COMPLEX)
1306         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1307 #else
1308         sum += (*v)*(*v); v++;
1309 #endif
1310       }
1311       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1312       *norm = sqrt(*norm);
1313     } else if (type == NORM_1) { /* max column norm */
1314       PetscReal *tmp,*tmp2;
1315       int    *jj,*garray = aij->garray;
1316       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1317       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1318       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1319       *norm = 0.0;
1320       v = amat->a; jj = amat->j;
1321       for (j=0; j<amat->nz; j++) {
1322         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1323       }
1324       v = bmat->a; jj = bmat->j;
1325       for (j=0; j<bmat->nz; j++) {
1326         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1327       }
1328       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1329       for (j=0; j<mat->N; j++) {
1330         if (tmp2[j] > *norm) *norm = tmp2[j];
1331       }
1332       ierr = PetscFree(tmp);CHKERRQ(ierr);
1333       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1334     } else if (type == NORM_INFINITY) { /* max row norm */
1335       PetscReal ntemp = 0.0;
1336       for (j=0; j<aij->A->m; j++) {
1337         v = amat->a + amat->i[j];
1338         sum = 0.0;
1339         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1340           sum += PetscAbsScalar(*v); v++;
1341         }
1342         v = bmat->a + bmat->i[j];
1343         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1344           sum += PetscAbsScalar(*v); v++;
1345         }
1346         if (sum > ntemp) ntemp = sum;
1347       }
1348       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1349     } else {
1350       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1351     }
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatTranspose_MPIAIJ"
1358 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1359 {
1360   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1361   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1362   int          ierr;
1363   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1364   Mat          B;
1365   PetscScalar  *array;
1366 
1367   PetscFunctionBegin;
1368   if (!matout && M != N) {
1369     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1370   }
1371 
1372   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1373   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1374   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1375 
1376   /* copy over the A part */
1377   Aloc = (Mat_SeqAIJ*)a->A->data;
1378   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1379   row = a->rstart;
1380   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1381   for (i=0; i<m; i++) {
1382     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1383     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1384   }
1385   aj = Aloc->j;
1386   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1387 
1388   /* copy over the B part */
1389   Aloc = (Mat_SeqAIJ*)a->B->data;
1390   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1391   row  = a->rstart;
1392   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1393   ct   = cols;
1394   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1395   for (i=0; i<m; i++) {
1396     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1397     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1398   }
1399   ierr = PetscFree(ct);CHKERRQ(ierr);
1400   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1401   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402   if (matout) {
1403     *matout = B;
1404   } else {
1405     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1412 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1413 {
1414   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1415   Mat        a = aij->A,b = aij->B;
1416   int        ierr,s1,s2,s3;
1417 
1418   PetscFunctionBegin;
1419   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1420   if (rr) {
1421     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1422     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1423     /* Overlap communication with computation. */
1424     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1425   }
1426   if (ll) {
1427     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1428     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1429     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1430   }
1431   /* scale  the diagonal block */
1432   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1433 
1434   if (rr) {
1435     /* Do a scatter end and then right scale the off-diagonal block */
1436     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1437     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1438   }
1439 
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1446 int MatPrintHelp_MPIAIJ(Mat A)
1447 {
1448   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1449   int        ierr;
1450 
1451   PetscFunctionBegin;
1452   if (!a->rank) {
1453     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1460 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1461 {
1462   PetscFunctionBegin;
1463   *bs = 1;
1464   PetscFunctionReturn(0);
1465 }
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1468 int MatSetUnfactored_MPIAIJ(Mat A)
1469 {
1470   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1471   int        ierr;
1472 
1473   PetscFunctionBegin;
1474   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatEqual_MPIAIJ"
1480 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1481 {
1482   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1483   Mat        a,b,c,d;
1484   PetscTruth flg;
1485   int        ierr;
1486 
1487   PetscFunctionBegin;
1488   a = matA->A; b = matA->B;
1489   c = matB->A; d = matB->B;
1490 
1491   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1492   if (flg == PETSC_TRUE) {
1493     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1494   }
1495   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatCopy_MPIAIJ"
1501 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1502 {
1503   int        ierr;
1504   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1505   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1506 
1507   PetscFunctionBegin;
1508   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1509   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1510     /* because of the column compression in the off-processor part of the matrix a->B,
1511        the number of columns in a->B and b->B may be different, hence we cannot call
1512        the MatCopy() directly on the two parts. If need be, we can provide a more
1513        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1514        then copying the submatrices */
1515     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1516   } else {
1517     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1518     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1519   }
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1525 int MatSetUpPreallocation_MPIAIJ(Mat A)
1526 {
1527   int        ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #include "petscblaslapack.h"
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatAXPY_MPIAIJ"
1537 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1538 {
1539   int        ierr,one=1,i;
1540   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1541   Mat_SeqAIJ *x,*y;
1542 
1543   PetscFunctionBegin;
1544   if (str == SAME_NONZERO_PATTERN) {
1545     x = (Mat_SeqAIJ *)xx->A->data;
1546     y = (Mat_SeqAIJ *)yy->A->data;
1547     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1548     x = (Mat_SeqAIJ *)xx->B->data;
1549     y = (Mat_SeqAIJ *)yy->B->data;
1550     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1551   } else if (str == SUBSET_NONZERO_PATTERN) {
1552     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1553 
1554     x = (Mat_SeqAIJ *)xx->B->data;
1555     y = (Mat_SeqAIJ *)yy->B->data;
1556     if (y->xtoy && y->XtoY != xx->B) {
1557       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1558       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1559     }
1560     if (!y->xtoy) { /* get xtoy */
1561       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1562       y->XtoY = xx->B;
1563     }
1564     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1565   } else {
1566     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 /* -------------------------------------------------------------------*/
1572 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1573        MatGetRow_MPIAIJ,
1574        MatRestoreRow_MPIAIJ,
1575        MatMult_MPIAIJ,
1576 /* 4*/ MatMultAdd_MPIAIJ,
1577        MatMultTranspose_MPIAIJ,
1578        MatMultTransposeAdd_MPIAIJ,
1579        0,
1580        0,
1581        0,
1582 /*10*/ 0,
1583        0,
1584        0,
1585        MatRelax_MPIAIJ,
1586        MatTranspose_MPIAIJ,
1587 /*15*/ MatGetInfo_MPIAIJ,
1588        MatEqual_MPIAIJ,
1589        MatGetDiagonal_MPIAIJ,
1590        MatDiagonalScale_MPIAIJ,
1591        MatNorm_MPIAIJ,
1592 /*20*/ MatAssemblyBegin_MPIAIJ,
1593        MatAssemblyEnd_MPIAIJ,
1594        0,
1595        MatSetOption_MPIAIJ,
1596        MatZeroEntries_MPIAIJ,
1597 /*25*/ MatZeroRows_MPIAIJ,
1598 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1599        MatLUFactorSymbolic_MPIAIJ_TFS,
1600 #else
1601        0,
1602 #endif
1603        0,
1604        0,
1605        0,
1606 /*30*/ MatSetUpPreallocation_MPIAIJ,
1607        0,
1608        0,
1609        0,
1610        0,
1611 /*35*/ MatDuplicate_MPIAIJ,
1612        0,
1613        0,
1614        0,
1615        0,
1616 /*40*/ MatAXPY_MPIAIJ,
1617        MatGetSubMatrices_MPIAIJ,
1618        MatIncreaseOverlap_MPIAIJ,
1619        MatGetValues_MPIAIJ,
1620        MatCopy_MPIAIJ,
1621 /*45*/ MatPrintHelp_MPIAIJ,
1622        MatScale_MPIAIJ,
1623        0,
1624        0,
1625        0,
1626 /*50*/ MatGetBlockSize_MPIAIJ,
1627        0,
1628        0,
1629        0,
1630        0,
1631 /*55*/ MatFDColoringCreate_MPIAIJ,
1632        0,
1633        MatSetUnfactored_MPIAIJ,
1634        0,
1635        0,
1636 /*60*/ MatGetSubMatrix_MPIAIJ,
1637        MatDestroy_MPIAIJ,
1638        MatView_MPIAIJ,
1639        MatGetPetscMaps_Petsc,
1640        0,
1641 /*65*/ 0,
1642        0,
1643        0,
1644        0,
1645        0,
1646 /*70*/ 0,
1647        0,
1648        MatSetColoring_MPIAIJ,
1649        MatSetValuesAdic_MPIAIJ,
1650        MatSetValuesAdifor_MPIAIJ,
1651 /*75*/ 0,
1652        0,
1653        0,
1654        0,
1655        0,
1656 /*80*/ 0,
1657        0,
1658        0,
1659        0,
1660 /*85*/ MatLoad_MPIAIJ,
1661        0,
1662        0,
1663        0,
1664        0,
1665 /*90*/ 0,
1666        MatMatMult_MPIAIJ_MPIAIJ,
1667        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1668        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1669 };
1670 
1671 /* ----------------------------------------------------------------------------------------*/
1672 
1673 EXTERN_C_BEGIN
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1676 int MatStoreValues_MPIAIJ(Mat mat)
1677 {
1678   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1679   int        ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1683   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 EXTERN_C_END
1687 
1688 EXTERN_C_BEGIN
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1691 int MatRetrieveValues_MPIAIJ(Mat mat)
1692 {
1693   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1694   int        ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1698   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 EXTERN_C_END
1702 
1703 #include "petscpc.h"
1704 EXTERN_C_BEGIN
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1707 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1708 {
1709   Mat_MPIAIJ   *b;
1710   int          ierr,i;
1711 
1712   PetscFunctionBegin;
1713   B->preallocated = PETSC_TRUE;
1714   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1715   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1716   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1717   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1718   if (d_nnz) {
1719     for (i=0; i<B->m; i++) {
1720       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1721     }
1722   }
1723   if (o_nnz) {
1724     for (i=0; i<B->m; i++) {
1725       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1726     }
1727   }
1728   b = (Mat_MPIAIJ*)B->data;
1729   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1730   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 EXTERN_C_END
1735 
1736 /*MC
1737    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1738 
1739    Options Database Keys:
1740 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1741 
1742   Level: beginner
1743 
1744 .seealso: MatCreateMPIAIJ
1745 M*/
1746 
1747 EXTERN_C_BEGIN
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatCreate_MPIAIJ"
1750 int MatCreate_MPIAIJ(Mat B)
1751 {
1752   Mat_MPIAIJ *b;
1753   int        ierr,i,size;
1754 
1755   PetscFunctionBegin;
1756   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1757 
1758   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1759   B->data         = (void*)b;
1760   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1761   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1762   B->factor       = 0;
1763   B->assembled    = PETSC_FALSE;
1764   B->mapping      = 0;
1765 
1766   B->insertmode      = NOT_SET_VALUES;
1767   b->size            = size;
1768   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1769 
1770   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1771   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1772 
1773   /* the information in the maps duplicates the information computed below, eventually
1774      we should remove the duplicate information that is not contained in the maps */
1775   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1776   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1777 
1778   /* build local table of row and column ownerships */
1779   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1780   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1781   b->cowners = b->rowners + b->size + 2;
1782   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1783   b->rowners[0] = 0;
1784   for (i=2; i<=b->size; i++) {
1785     b->rowners[i] += b->rowners[i-1];
1786   }
1787   b->rstart = b->rowners[b->rank];
1788   b->rend   = b->rowners[b->rank+1];
1789   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1790   b->cowners[0] = 0;
1791   for (i=2; i<=b->size; i++) {
1792     b->cowners[i] += b->cowners[i-1];
1793   }
1794   b->cstart = b->cowners[b->rank];
1795   b->cend   = b->cowners[b->rank+1];
1796 
1797   /* build cache for off array entries formed */
1798   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1799   b->donotstash  = PETSC_FALSE;
1800   b->colmap      = 0;
1801   b->garray      = 0;
1802   b->roworiented = PETSC_TRUE;
1803 
1804   /* stuff used for matrix vector multiply */
1805   b->lvec      = PETSC_NULL;
1806   b->Mvctx     = PETSC_NULL;
1807 
1808   /* stuff for MatGetRow() */
1809   b->rowindices   = 0;
1810   b->rowvalues    = 0;
1811   b->getrowactive = PETSC_FALSE;
1812 
1813   /* Explicitly create 2 MATSEQAIJ matrices. */
1814   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1815   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1816   PetscLogObjectParent(B,b->A);
1817   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1818   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1819   PetscLogObjectParent(B,b->B);
1820 
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1822                                      "MatStoreValues_MPIAIJ",
1823                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1825                                      "MatRetrieveValues_MPIAIJ",
1826                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1828 				     "MatGetDiagonalBlock_MPIAIJ",
1829                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1831 				     "MatIsTranspose_MPIAIJ",
1832 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1834 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1835 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1837 				     "MatDiagonalScaleLocal_MPIAIJ",
1838 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 EXTERN_C_END
1842 
1843 /*MC
1844    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1845 
1846    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1847    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1848   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1849   for communicators controlling multiple processes.  It is recommended that you call both of
1850   the above preallocation routines for simplicity.
1851 
1852    Options Database Keys:
1853 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1854 
1855   Level: beginner
1856 
1857 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1858 M*/
1859 
1860 EXTERN_C_BEGIN
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatCreate_AIJ"
1863 int MatCreate_AIJ(Mat A) {
1864   int ierr,size;
1865 
1866   PetscFunctionBegin;
1867   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1868   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1869   if (size == 1) {
1870     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1871   } else {
1872     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1873   }
1874   PetscFunctionReturn(0);
1875 }
1876 EXTERN_C_END
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1880 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1881 {
1882   Mat        mat;
1883   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1884   int        ierr;
1885 
1886   PetscFunctionBegin;
1887   *newmat       = 0;
1888   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1889   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1890   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1891   a    = (Mat_MPIAIJ*)mat->data;
1892 
1893   mat->factor       = matin->factor;
1894   mat->assembled    = PETSC_TRUE;
1895   mat->insertmode   = NOT_SET_VALUES;
1896   mat->preallocated = PETSC_TRUE;
1897 
1898   a->rstart       = oldmat->rstart;
1899   a->rend         = oldmat->rend;
1900   a->cstart       = oldmat->cstart;
1901   a->cend         = oldmat->cend;
1902   a->size         = oldmat->size;
1903   a->rank         = oldmat->rank;
1904   a->donotstash   = oldmat->donotstash;
1905   a->roworiented  = oldmat->roworiented;
1906   a->rowindices   = 0;
1907   a->rowvalues    = 0;
1908   a->getrowactive = PETSC_FALSE;
1909 
1910   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1911   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1912   if (oldmat->colmap) {
1913 #if defined (PETSC_USE_CTABLE)
1914     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1915 #else
1916     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1917     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1918     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1919 #endif
1920   } else a->colmap = 0;
1921   if (oldmat->garray) {
1922     int len;
1923     len  = oldmat->B->n;
1924     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1925     PetscLogObjectMemory(mat,len*sizeof(int));
1926     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1927   } else a->garray = 0;
1928 
1929   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1930   PetscLogObjectParent(mat,a->lvec);
1931   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1932   PetscLogObjectParent(mat,a->Mvctx);
1933   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1934   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1935   PetscLogObjectParent(mat,a->A);
1936   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1937   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1938   PetscLogObjectParent(mat,a->B);
1939   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1940   *newmat = mat;
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #include "petscsys.h"
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatLoad_MPIAIJ"
1948 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1949 {
1950   Mat          A;
1951   PetscScalar  *vals,*svals;
1952   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1953   MPI_Status   status;
1954   int          i,nz,ierr,j,rstart,rend,fd;
1955   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1956   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1957   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1958 
1959   PetscFunctionBegin;
1960   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1961   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1962   if (!rank) {
1963     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1964     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1965     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1966     if (header[3] < 0) {
1967       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1968     }
1969   }
1970 
1971   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1972   M = header[1]; N = header[2];
1973   /* determine ownership of all rows */
1974   m = M/size + ((M % size) > rank);
1975   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1976   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1977   rowners[0] = 0;
1978   for (i=2; i<=size; i++) {
1979     rowners[i] += rowners[i-1];
1980   }
1981   rstart = rowners[rank];
1982   rend   = rowners[rank+1];
1983 
1984   /* distribute row lengths to all processors */
1985   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1986   offlens = ourlens + (rend-rstart);
1987   if (!rank) {
1988     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1989     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1990     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1991     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1992     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1993     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1994   } else {
1995     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1996   }
1997 
1998   if (!rank) {
1999     /* calculate the number of nonzeros on each processor */
2000     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2001     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2002     for (i=0; i<size; i++) {
2003       for (j=rowners[i]; j< rowners[i+1]; j++) {
2004         procsnz[i] += rowlengths[j];
2005       }
2006     }
2007     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2008 
2009     /* determine max buffer needed and allocate it */
2010     maxnz = 0;
2011     for (i=0; i<size; i++) {
2012       maxnz = PetscMax(maxnz,procsnz[i]);
2013     }
2014     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2015 
2016     /* read in my part of the matrix column indices  */
2017     nz   = procsnz[0];
2018     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2019     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2020 
2021     /* read in every one elses and ship off */
2022     for (i=1; i<size; i++) {
2023       nz   = procsnz[i];
2024       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2025       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2026     }
2027     ierr = PetscFree(cols);CHKERRQ(ierr);
2028   } else {
2029     /* determine buffer space needed for message */
2030     nz = 0;
2031     for (i=0; i<m; i++) {
2032       nz += ourlens[i];
2033     }
2034     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2035 
2036     /* receive message of column indices*/
2037     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2038     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2039     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2040   }
2041 
2042   /* determine column ownership if matrix is not square */
2043   if (N != M) {
2044     n      = N/size + ((N % size) > rank);
2045     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2046     cstart = cend - n;
2047   } else {
2048     cstart = rstart;
2049     cend   = rend;
2050     n      = cend - cstart;
2051   }
2052 
2053   /* loop over local rows, determining number of off diagonal entries */
2054   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2055   jj = 0;
2056   for (i=0; i<m; i++) {
2057     for (j=0; j<ourlens[i]; j++) {
2058       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2059       jj++;
2060     }
2061   }
2062 
2063   /* create our matrix */
2064   for (i=0; i<m; i++) {
2065     ourlens[i] -= offlens[i];
2066   }
2067   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2068   ierr = MatSetType(A,type);CHKERRQ(ierr);
2069   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2070 
2071   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2072   for (i=0; i<m; i++) {
2073     ourlens[i] += offlens[i];
2074   }
2075 
2076   if (!rank) {
2077     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2078 
2079     /* read in my part of the matrix numerical values  */
2080     nz   = procsnz[0];
2081     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2082 
2083     /* insert into matrix */
2084     jj      = rstart;
2085     smycols = mycols;
2086     svals   = vals;
2087     for (i=0; i<m; i++) {
2088       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2089       smycols += ourlens[i];
2090       svals   += ourlens[i];
2091       jj++;
2092     }
2093 
2094     /* read in other processors and ship out */
2095     for (i=1; i<size; i++) {
2096       nz   = procsnz[i];
2097       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2098       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2099     }
2100     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2101   } else {
2102     /* receive numeric values */
2103     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2104 
2105     /* receive message of values*/
2106     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2107     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2108     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2109 
2110     /* insert into matrix */
2111     jj      = rstart;
2112     smycols = mycols;
2113     svals   = vals;
2114     for (i=0; i<m; i++) {
2115       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2116       smycols += ourlens[i];
2117       svals   += ourlens[i];
2118       jj++;
2119     }
2120   }
2121   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2122   ierr = PetscFree(vals);CHKERRQ(ierr);
2123   ierr = PetscFree(mycols);CHKERRQ(ierr);
2124   ierr = PetscFree(rowners);CHKERRQ(ierr);
2125 
2126   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2127   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2128   *newmat = A;
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2134 /*
2135     Not great since it makes two copies of the submatrix, first an SeqAIJ
2136   in local and then by concatenating the local matrices the end result.
2137   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2138 */
2139 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2140 {
2141   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2142   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2143   Mat          *local,M,Mreuse;
2144   PetscScalar  *vwork,*aa;
2145   MPI_Comm     comm = mat->comm;
2146   Mat_SeqAIJ   *aij;
2147 
2148 
2149   PetscFunctionBegin;
2150   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2151   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2152 
2153   if (call ==  MAT_REUSE_MATRIX) {
2154     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2155     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2156     local = &Mreuse;
2157     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2158   } else {
2159     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2160     Mreuse = *local;
2161     ierr   = PetscFree(local);CHKERRQ(ierr);
2162   }
2163 
2164   /*
2165       m - number of local rows
2166       n - number of columns (same on all processors)
2167       rstart - first row in new global matrix generated
2168   */
2169   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2170   if (call == MAT_INITIAL_MATRIX) {
2171     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2172     ii  = aij->i;
2173     jj  = aij->j;
2174 
2175     /*
2176         Determine the number of non-zeros in the diagonal and off-diagonal
2177         portions of the matrix in order to do correct preallocation
2178     */
2179 
2180     /* first get start and end of "diagonal" columns */
2181     if (csize == PETSC_DECIDE) {
2182       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2183       if (mglobal == n) { /* square matrix */
2184 	nlocal = m;
2185       } else {
2186         nlocal = n/size + ((n % size) > rank);
2187       }
2188     } else {
2189       nlocal = csize;
2190     }
2191     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2192     rstart = rend - nlocal;
2193     if (rank == size - 1 && rend != n) {
2194       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2195     }
2196 
2197     /* next, compute all the lengths */
2198     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2199     olens = dlens + m;
2200     for (i=0; i<m; i++) {
2201       jend = ii[i+1] - ii[i];
2202       olen = 0;
2203       dlen = 0;
2204       for (j=0; j<jend; j++) {
2205         if (*jj < rstart || *jj >= rend) olen++;
2206         else dlen++;
2207         jj++;
2208       }
2209       olens[i] = olen;
2210       dlens[i] = dlen;
2211     }
2212     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2213     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2214     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2215     ierr = PetscFree(dlens);CHKERRQ(ierr);
2216   } else {
2217     int ml,nl;
2218 
2219     M = *newmat;
2220     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2221     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2222     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2223     /*
2224          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2225        rather than the slower MatSetValues().
2226     */
2227     M->was_assembled = PETSC_TRUE;
2228     M->assembled     = PETSC_FALSE;
2229   }
2230   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2231   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2232   ii  = aij->i;
2233   jj  = aij->j;
2234   aa  = aij->a;
2235   for (i=0; i<m; i++) {
2236     row   = rstart + i;
2237     nz    = ii[i+1] - ii[i];
2238     cwork = jj;     jj += nz;
2239     vwork = aa;     aa += nz;
2240     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2241   }
2242 
2243   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2244   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2245   *newmat = M;
2246 
2247   /* save submatrix used in processor for next request */
2248   if (call ==  MAT_INITIAL_MATRIX) {
2249     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2250     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2251   }
2252 
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2258 /*@C
2259    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2260    (the default parallel PETSc format).  For good matrix assembly performance
2261    the user should preallocate the matrix storage by setting the parameters
2262    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2263    performance can be increased by more than a factor of 50.
2264 
2265    Collective on MPI_Comm
2266 
2267    Input Parameters:
2268 +  A - the matrix
2269 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2270            (same value is used for all local rows)
2271 .  d_nnz - array containing the number of nonzeros in the various rows of the
2272            DIAGONAL portion of the local submatrix (possibly different for each row)
2273            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2274            The size of this array is equal to the number of local rows, i.e 'm'.
2275            You must leave room for the diagonal entry even if it is zero.
2276 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2277            submatrix (same value is used for all local rows).
2278 -  o_nnz - array containing the number of nonzeros in the various rows of the
2279            OFF-DIAGONAL portion of the local submatrix (possibly different for
2280            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2281            structure. The size of this array is equal to the number
2282            of local rows, i.e 'm'.
2283 
2284    The AIJ format (also called the Yale sparse matrix format or
2285    compressed row storage), is fully compatible with standard Fortran 77
2286    storage.  That is, the stored row and column indices can begin at
2287    either one (as in Fortran) or zero.  See the users manual for details.
2288 
2289    The user MUST specify either the local or global matrix dimensions
2290    (possibly both).
2291 
2292    The parallel matrix is partitioned such that the first m0 rows belong to
2293    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2294    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2295 
2296    The DIAGONAL portion of the local submatrix of a processor can be defined
2297    as the submatrix which is obtained by extraction the part corresponding
2298    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2299    first row that belongs to the processor, and r2 is the last row belonging
2300    to the this processor. This is a square mxm matrix. The remaining portion
2301    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2302 
2303    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2304 
2305    By default, this format uses inodes (identical nodes) when possible.
2306    We search for consecutive rows with the same nonzero structure, thereby
2307    reusing matrix information to achieve increased efficiency.
2308 
2309    Options Database Keys:
2310 +  -mat_aij_no_inode  - Do not use inodes
2311 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2312 -  -mat_aij_oneindex - Internally use indexing starting at 1
2313         rather than 0.  Note that when calling MatSetValues(),
2314         the user still MUST index entries starting at 0!
2315 
2316    Example usage:
2317 
2318    Consider the following 8x8 matrix with 34 non-zero values, that is
2319    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2320    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2321    as follows:
2322 
2323 .vb
2324             1  2  0  |  0  3  0  |  0  4
2325     Proc0   0  5  6  |  7  0  0  |  8  0
2326             9  0 10  | 11  0  0  | 12  0
2327     -------------------------------------
2328            13  0 14  | 15 16 17  |  0  0
2329     Proc1   0 18  0  | 19 20 21  |  0  0
2330             0  0  0  | 22 23  0  | 24  0
2331     -------------------------------------
2332     Proc2  25 26 27  |  0  0 28  | 29  0
2333            30  0  0  | 31 32 33  |  0 34
2334 .ve
2335 
2336    This can be represented as a collection of submatrices as:
2337 
2338 .vb
2339       A B C
2340       D E F
2341       G H I
2342 .ve
2343 
2344    Where the submatrices A,B,C are owned by proc0, D,E,F are
2345    owned by proc1, G,H,I are owned by proc2.
2346 
2347    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2348    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2349    The 'M','N' parameters are 8,8, and have the same values on all procs.
2350 
2351    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2352    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2353    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2354    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2355    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2356    matrix, ans [DF] as another SeqAIJ matrix.
2357 
2358    When d_nz, o_nz parameters are specified, d_nz storage elements are
2359    allocated for every row of the local diagonal submatrix, and o_nz
2360    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2361    One way to choose d_nz and o_nz is to use the max nonzerors per local
2362    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2363    In this case, the values of d_nz,o_nz are:
2364 .vb
2365      proc0 : dnz = 2, o_nz = 2
2366      proc1 : dnz = 3, o_nz = 2
2367      proc2 : dnz = 1, o_nz = 4
2368 .ve
2369    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2370    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2371    for proc3. i.e we are using 12+15+10=37 storage locations to store
2372    34 values.
2373 
2374    When d_nnz, o_nnz parameters are specified, the storage is specified
2375    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2376    In the above case the values for d_nnz,o_nnz are:
2377 .vb
2378      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2379      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2380      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2381 .ve
2382    Here the space allocated is sum of all the above values i.e 34, and
2383    hence pre-allocation is perfect.
2384 
2385    Level: intermediate
2386 
2387 .keywords: matrix, aij, compressed row, sparse, parallel
2388 
2389 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2390 @*/
2391 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2392 {
2393   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2394 
2395   PetscFunctionBegin;
2396   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2397   if (f) {
2398     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2399   }
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatCreateMPIAIJ"
2405 /*@C
2406    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2407    (the default parallel PETSc format).  For good matrix assembly performance
2408    the user should preallocate the matrix storage by setting the parameters
2409    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2410    performance can be increased by more than a factor of 50.
2411 
2412    Collective on MPI_Comm
2413 
2414    Input Parameters:
2415 +  comm - MPI communicator
2416 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2417            This value should be the same as the local size used in creating the
2418            y vector for the matrix-vector product y = Ax.
2419 .  n - This value should be the same as the local size used in creating the
2420        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2421        calculated if N is given) For square matrices n is almost always m.
2422 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2423 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2424 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2425            (same value is used for all local rows)
2426 .  d_nnz - array containing the number of nonzeros in the various rows of the
2427            DIAGONAL portion of the local submatrix (possibly different for each row)
2428            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2429            The size of this array is equal to the number of local rows, i.e 'm'.
2430            You must leave room for the diagonal entry even if it is zero.
2431 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2432            submatrix (same value is used for all local rows).
2433 -  o_nnz - array containing the number of nonzeros in the various rows of the
2434            OFF-DIAGONAL portion of the local submatrix (possibly different for
2435            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2436            structure. The size of this array is equal to the number
2437            of local rows, i.e 'm'.
2438 
2439    Output Parameter:
2440 .  A - the matrix
2441 
2442    Notes:
2443    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2444    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2445    storage requirements for this matrix.
2446 
2447    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2448    processor than it must be used on all processors that share the object for
2449    that argument.
2450 
2451    The AIJ format (also called the Yale sparse matrix format or
2452    compressed row storage), is fully compatible with standard Fortran 77
2453    storage.  That is, the stored row and column indices can begin at
2454    either one (as in Fortran) or zero.  See the users manual for details.
2455 
2456    The user MUST specify either the local or global matrix dimensions
2457    (possibly both).
2458 
2459    The parallel matrix is partitioned such that the first m0 rows belong to
2460    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2461    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2462 
2463    The DIAGONAL portion of the local submatrix of a processor can be defined
2464    as the submatrix which is obtained by extraction the part corresponding
2465    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2466    first row that belongs to the processor, and r2 is the last row belonging
2467    to the this processor. This is a square mxm matrix. The remaining portion
2468    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2469 
2470    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2471 
2472    When calling this routine with a single process communicator, a matrix of
2473    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2474    type of communicator, use the construction mechanism:
2475      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2476 
2477    By default, this format uses inodes (identical nodes) when possible.
2478    We search for consecutive rows with the same nonzero structure, thereby
2479    reusing matrix information to achieve increased efficiency.
2480 
2481    Options Database Keys:
2482 +  -mat_aij_no_inode  - Do not use inodes
2483 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2484 -  -mat_aij_oneindex - Internally use indexing starting at 1
2485         rather than 0.  Note that when calling MatSetValues(),
2486         the user still MUST index entries starting at 0!
2487 
2488 
2489    Example usage:
2490 
2491    Consider the following 8x8 matrix with 34 non-zero values, that is
2492    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2493    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2494    as follows:
2495 
2496 .vb
2497             1  2  0  |  0  3  0  |  0  4
2498     Proc0   0  5  6  |  7  0  0  |  8  0
2499             9  0 10  | 11  0  0  | 12  0
2500     -------------------------------------
2501            13  0 14  | 15 16 17  |  0  0
2502     Proc1   0 18  0  | 19 20 21  |  0  0
2503             0  0  0  | 22 23  0  | 24  0
2504     -------------------------------------
2505     Proc2  25 26 27  |  0  0 28  | 29  0
2506            30  0  0  | 31 32 33  |  0 34
2507 .ve
2508 
2509    This can be represented as a collection of submatrices as:
2510 
2511 .vb
2512       A B C
2513       D E F
2514       G H I
2515 .ve
2516 
2517    Where the submatrices A,B,C are owned by proc0, D,E,F are
2518    owned by proc1, G,H,I are owned by proc2.
2519 
2520    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2521    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2522    The 'M','N' parameters are 8,8, and have the same values on all procs.
2523 
2524    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2525    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2526    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2527    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2528    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2529    matrix, ans [DF] as another SeqAIJ matrix.
2530 
2531    When d_nz, o_nz parameters are specified, d_nz storage elements are
2532    allocated for every row of the local diagonal submatrix, and o_nz
2533    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2534    One way to choose d_nz and o_nz is to use the max nonzerors per local
2535    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2536    In this case, the values of d_nz,o_nz are:
2537 .vb
2538      proc0 : dnz = 2, o_nz = 2
2539      proc1 : dnz = 3, o_nz = 2
2540      proc2 : dnz = 1, o_nz = 4
2541 .ve
2542    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2543    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2544    for proc3. i.e we are using 12+15+10=37 storage locations to store
2545    34 values.
2546 
2547    When d_nnz, o_nnz parameters are specified, the storage is specified
2548    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2549    In the above case the values for d_nnz,o_nnz are:
2550 .vb
2551      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2552      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2553      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2554 .ve
2555    Here the space allocated is sum of all the above values i.e 34, and
2556    hence pre-allocation is perfect.
2557 
2558    Level: intermediate
2559 
2560 .keywords: matrix, aij, compressed row, sparse, parallel
2561 
2562 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2563 @*/
2564 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2565 {
2566   int ierr,size;
2567 
2568   PetscFunctionBegin;
2569   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2570   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2571   if (size > 1) {
2572     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2573     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2574   } else {
2575     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2576     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2583 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2584 {
2585   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2586   PetscFunctionBegin;
2587   *Ad     = a->A;
2588   *Ao     = a->B;
2589   *colmap = a->garray;
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 #undef __FUNCT__
2594 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2595 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2596 {
2597   int        ierr,i;
2598   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2599 
2600   PetscFunctionBegin;
2601   if (coloring->ctype == IS_COLORING_LOCAL) {
2602     ISColoringValue *allcolors,*colors;
2603     ISColoring      ocoloring;
2604 
2605     /* set coloring for diagonal portion */
2606     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2607 
2608     /* set coloring for off-diagonal portion */
2609     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2610     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2611     for (i=0; i<a->B->n; i++) {
2612       colors[i] = allcolors[a->garray[i]];
2613     }
2614     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2615     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2616     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2617     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2618   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2619     ISColoringValue *colors;
2620     int             *larray;
2621     ISColoring      ocoloring;
2622 
2623     /* set coloring for diagonal portion */
2624     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2625     for (i=0; i<a->A->n; i++) {
2626       larray[i] = i + a->cstart;
2627     }
2628     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2629     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2630     for (i=0; i<a->A->n; i++) {
2631       colors[i] = coloring->colors[larray[i]];
2632     }
2633     ierr = PetscFree(larray);CHKERRQ(ierr);
2634     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2635     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2636     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2637 
2638     /* set coloring for off-diagonal portion */
2639     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2640     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2641     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2642     for (i=0; i<a->B->n; i++) {
2643       colors[i] = coloring->colors[larray[i]];
2644     }
2645     ierr = PetscFree(larray);CHKERRQ(ierr);
2646     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2647     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2648     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2649   } else {
2650     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2651   }
2652 
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2658 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2659 {
2660   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2661   int        ierr;
2662 
2663   PetscFunctionBegin;
2664   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2665   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 #undef __FUNCT__
2670 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2671 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2672 {
2673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2674   int        ierr;
2675 
2676   PetscFunctionBegin;
2677   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2678   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatMerge"
2684 /*@C
2685       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2686                  matrices from each processor
2687 
2688     Collective on MPI_Comm
2689 
2690    Input Parameters:
2691 +    comm - the communicators the parallel matrix will live on
2692 .    inmat - the input sequential matrices
2693 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2694 
2695    Output Parameter:
2696 .    outmat - the parallel matrix generated
2697 
2698     Level: advanced
2699 
2700    Notes: The number of columns of the matrix in EACH of the seperate files
2701       MUST be the same.
2702 
2703 @*/
2704 int MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat)
2705 {
2706   int               ierr,m,n,i,rstart,nnz,I,*dnz,*onz;
2707   const int         *indx;
2708   const PetscScalar *values;
2709   PetscMap          columnmap,rowmap;
2710 
2711   PetscFunctionBegin;
2712 
2713   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2714   if (scall == MAT_INITIAL_MATRIX){
2715     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2716     ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2717     ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2718     ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2719     ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2720     ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2721 
2722     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2723     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2724     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2725     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2726     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2727 
2728     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2729     for (i=0;i<m;i++) {
2730       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2731       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2732       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2733     }
2734     /* This routine will ONLY return MPIAIJ type matrix */
2735     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2736     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2737     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2738     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2739 
2740   } else if (scall == MAT_REUSE_MATRIX){
2741     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2742   } else {
2743     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2744   }
2745 
2746   for (i=0;i<m;i++) {
2747     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2748     I    = i + rstart;
2749     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2750     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2751   }
2752   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2753   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2754   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755 #ifdef OLD
2756   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2757 
2758   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2759   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2760   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2761   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2762   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2763   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2764 
2765   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2766   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2767   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2768   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2769   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2770 
2771   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2772   for (i=0;i<m;i++) {
2773     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2774     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2775     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2776   }
2777   /* This routine will ONLY return MPIAIJ type matrix */
2778   ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2779   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2780   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2781   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2782 
2783   for (i=0;i<m;i++) {
2784     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2785     I    = i + rstart;
2786     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2787     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2788   }
2789   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2790   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792 #endif
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatFileSplit"
2798 int MatFileSplit(Mat A,char *outfile)
2799 {
2800   int               ierr,rank,len,m,N,i,rstart,nnz;
2801   const int         *indx;
2802   PetscViewer       out;
2803   char              *name;
2804   Mat               B;
2805   const PetscScalar *values;
2806 
2807   PetscFunctionBegin;
2808 
2809   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2810   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2811   /* Should this be the type of the diagonal block of A? */
2812   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2813   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2814   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2815   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2816   for (i=0;i<m;i++) {
2817     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2818     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2819     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2820   }
2821   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2822   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2823 
2824   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2825   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2826   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2827   sprintf(name,"%s.%d",outfile,rank);
2828   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2829   ierr = PetscFree(name);
2830   ierr = MatView(B,out);CHKERRQ(ierr);
2831   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2832   ierr = MatDestroy(B);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835