xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a30b231344809e2b903c9b58fd39085f5b865294)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 
407   /* used by MatAXPY() */
408   Mat_SeqAIJ *a  = (Mat_SeqAIJ *)aij->A->data;
409   Mat_SeqAIJ *b  = (Mat_SeqAIJ *)aij->B->data;
410   a->xtoy = 0; b->xtoy = 0;
411   a->XtoY = 0; b->XtoY = 0;
412 
413 #if defined(PETSC_HAVE_SUPERLUDIST)
414   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
415   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
416 #endif
417 
418 #if defined(PETSC_HAVE_SPOOLES)
419   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
420   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
421 #endif
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
427 int MatZeroEntries_MPIAIJ(Mat A)
428 {
429   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
430   int        ierr;
431 
432   PetscFunctionBegin;
433   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
434   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatZeroRows_MPIAIJ"
440 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
441 {
442   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
443   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
444   int            *nprocs,j,idx,nsends,row;
445   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
446   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
447   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
448   MPI_Comm       comm = A->comm;
449   MPI_Request    *send_waits,*recv_waits;
450   MPI_Status     recv_status,*send_status;
451   IS             istmp;
452   PetscTruth     found;
453 
454   PetscFunctionBegin;
455   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
456   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
457 
458   /*  first count number of contributors to each processor */
459   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
460   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
461   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
462   for (i=0; i<N; i++) {
463     idx = rows[i];
464     found = PETSC_FALSE;
465     for (j=0; j<size; j++) {
466       if (idx >= owners[j] && idx < owners[j+1]) {
467         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
468       }
469     }
470     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
471   }
472   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
473 
474   /* inform other processors of number of messages and max length*/
475   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
476 
477   /* post receives:   */
478   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
479   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
480   for (i=0; i<nrecvs; i++) {
481     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
482   }
483 
484   /* do sends:
485       1) starts[i] gives the starting index in svalues for stuff going to
486          the ith processor
487   */
488   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
489   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
490   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
491   starts[0] = 0;
492   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   for (i=0; i<N; i++) {
494     svalues[starts[owner[i]]++] = rows[i];
495   }
496   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
497 
498   starts[0] = 0;
499   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
500   count = 0;
501   for (i=0; i<size; i++) {
502     if (nprocs[2*i+1]) {
503       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
504     }
505   }
506   ierr = PetscFree(starts);CHKERRQ(ierr);
507 
508   base = owners[rank];
509 
510   /*  wait on receives */
511   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
512   source = lens + nrecvs;
513   count  = nrecvs; slen = 0;
514   while (count) {
515     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
516     /* unpack receives into our local space */
517     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
518     source[imdex]  = recv_status.MPI_SOURCE;
519     lens[imdex]    = n;
520     slen          += n;
521     count--;
522   }
523   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
524 
525   /* move the data into the send scatter */
526   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
527   count = 0;
528   for (i=0; i<nrecvs; i++) {
529     values = rvalues + i*nmax;
530     for (j=0; j<lens[i]; j++) {
531       lrows[count++] = values[j] - base;
532     }
533   }
534   ierr = PetscFree(rvalues);CHKERRQ(ierr);
535   ierr = PetscFree(lens);CHKERRQ(ierr);
536   ierr = PetscFree(owner);CHKERRQ(ierr);
537   ierr = PetscFree(nprocs);CHKERRQ(ierr);
538 
539   /* actually zap the local rows */
540   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
541   PetscLogObjectParent(A,istmp);
542 
543   /*
544         Zero the required rows. If the "diagonal block" of the matrix
545      is square and the user wishes to set the diagonal we use seperate
546      code so that MatSetValues() is not called for each diagonal allocating
547      new memory, thus calling lots of mallocs and slowing things down.
548 
549        Contributed by: Mathew Knepley
550   */
551   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
552   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
553   if (diag && (l->A->M == l->A->N)) {
554     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
555   } else if (diag) {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
558       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
559 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
560     }
561     for (i = 0; i < slen; i++) {
562       row  = lrows[i] + rstart;
563       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
564     }
565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
567   } else {
568     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
569   }
570   ierr = ISDestroy(istmp);CHKERRQ(ierr);
571   ierr = PetscFree(lrows);CHKERRQ(ierr);
572 
573   /* wait on sends */
574   if (nsends) {
575     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
576     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
577     ierr = PetscFree(send_status);CHKERRQ(ierr);
578   }
579   ierr = PetscFree(send_waits);CHKERRQ(ierr);
580   ierr = PetscFree(svalues);CHKERRQ(ierr);
581 
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatMult_MPIAIJ"
587 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
588 {
589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
590   int        ierr,nt;
591 
592   PetscFunctionBegin;
593   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
594   if (nt != A->n) {
595     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
596   }
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultAdd_MPIAIJ"
606 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   int        ierr;
610 
611   PetscFunctionBegin;
612   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
613   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
614   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
615   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
621 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
622 {
623   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
624   int        ierr;
625 
626   PetscFunctionBegin;
627   /* do nondiagonal part */
628   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
629   /* send it on its way */
630   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   /* do local part */
632   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
633   /* receive remote parts: note this assumes the values are not actually */
634   /* inserted in yy until the next line, which is true for my implementation*/
635   /* but is not perhaps always true. */
636   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
642 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
643 {
644   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
645   int        ierr;
646 
647   PetscFunctionBegin;
648   /* do nondiagonal part */
649   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
650   /* send it on its way */
651   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652   /* do local part */
653   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
654   /* receive remote parts: note this assumes the values are not actually */
655   /* inserted in yy until the next line, which is true for my implementation*/
656   /* but is not perhaps always true. */
657   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 /*
662   This only works correctly for square matrices where the subblock A->A is the
663    diagonal block
664 */
665 #undef __FUNCT__
666 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
667 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
668 {
669   int        ierr;
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671 
672   PetscFunctionBegin;
673   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
674   if (a->rstart != a->cstart || a->rend != a->cend) {
675     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
676   }
677   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatScale_MPIAIJ"
683 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
684 {
685   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
686   int        ierr;
687 
688   PetscFunctionBegin;
689   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
690   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatDestroy_MPIAIJ"
696 int MatDestroy_MPIAIJ(Mat mat)
697 {
698   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
699   int        ierr;
700 
701   PetscFunctionBegin;
702 #if defined(PETSC_USE_LOG)
703   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
704 #endif
705   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
706   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
708   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
709 #if defined (PETSC_USE_CTABLE)
710   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
711 #else
712   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
713 #endif
714   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
715   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
716   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
717   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
718   ierr = PetscFree(aij);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
723 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPIAIJ_Binary"
727 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
728 {
729   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
731   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
732   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
733   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
734   PetscScalar       *column_values;
735 
736   PetscFunctionBegin;
737   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
738   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
739   nz   = A->nz + B->nz;
740   if (rank == 0) {
741     header[0] = MAT_FILE_COOKIE;
742     header[1] = mat->M;
743     header[2] = mat->N;
744     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
745     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
746     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
747     /* get largest number of rows any processor has */
748     rlen = mat->m;
749     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
750     for (i=1; i<size; i++) {
751       rlen = PetscMax(rlen,range[i+1] - range[i]);
752     }
753   } else {
754     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
755     rlen = mat->m;
756   }
757 
758   /* load up the local row counts */
759   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
760   for (i=0; i<mat->m; i++) {
761     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
762   }
763 
764   /* store the row lengths to the file */
765   if (rank == 0) {
766     MPI_Status status;
767     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
768     for (i=1; i<size; i++) {
769       rlen = range[i+1] - range[i];
770       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
771       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
772     }
773   } else {
774     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
775   }
776   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
777 
778   /* load up the local column indices */
779   nzmax = nz; /* )th processor needs space a largest processor needs */
780   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
781   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
782   cnt  = 0;
783   for (i=0; i<mat->m; i++) {
784     for (j=B->i[i]; j<B->i[i+1]; j++) {
785       if ( (col = garray[B->j[j]]) > cstart) break;
786       column_indices[cnt++] = col;
787     }
788     for (k=A->i[i]; k<A->i[i+1]; k++) {
789       column_indices[cnt++] = A->j[k] + cstart;
790     }
791     for (; j<B->i[i+1]; j++) {
792       column_indices[cnt++] = garray[B->j[j]];
793     }
794   }
795   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
796 
797   /* store the column indices to the file */
798   if (rank == 0) {
799     MPI_Status status;
800     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
801     for (i=1; i<size; i++) {
802       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
803       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
804       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
805       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
806     }
807   } else {
808     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
809     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
810   }
811   ierr = PetscFree(column_indices);CHKERRQ(ierr);
812 
813   /* load up the local column values */
814   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
815   cnt  = 0;
816   for (i=0; i<mat->m; i++) {
817     for (j=B->i[i]; j<B->i[i+1]; j++) {
818       if ( garray[B->j[j]] > cstart) break;
819       column_values[cnt++] = B->a[j];
820     }
821     for (k=A->i[i]; k<A->i[i+1]; k++) {
822       column_values[cnt++] = A->a[k];
823     }
824     for (; j<B->i[i+1]; j++) {
825       column_values[cnt++] = B->a[j];
826     }
827   }
828   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
829 
830   /* store the column values to the file */
831   if (rank == 0) {
832     MPI_Status status;
833     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
834     for (i=1; i<size; i++) {
835       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
836       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
837       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
838       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
839     }
840   } else {
841     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
842     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
843   }
844   ierr = PetscFree(column_values);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
850 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
851 {
852   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
853   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
854   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
855   PetscTruth        isdraw,isascii,flg,isbinary;
856   PetscViewer       sviewer;
857   PetscViewerFormat format;
858 
859   PetscFunctionBegin;
860   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
861   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
862   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
863   if (isascii) {
864     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
865     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
866       MatInfo info;
867       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
868       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
869       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
870       if (flg) {
871         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
872 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
873       } else {
874         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
875 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
876       }
877       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
878       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
879       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
880       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
881       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
882       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
883       PetscFunctionReturn(0);
884     } else if (format == PETSC_VIEWER_ASCII_INFO) {
885       PetscFunctionReturn(0);
886     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
887 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
888       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
889 #endif
890 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
891       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
892 #endif
893 
894       PetscFunctionReturn(0);
895     }
896   } else if (isbinary) {
897     if (size == 1) {
898       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
899       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
900     } else {
901       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
902     }
903     PetscFunctionReturn(0);
904   } else if (isdraw) {
905     PetscDraw  draw;
906     PetscTruth isnull;
907     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
908     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
909   }
910 
911   if (size == 1) {
912     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
913     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
914   } else {
915     /* assemble the entire matrix onto first processor. */
916     Mat         A;
917     Mat_SeqAIJ *Aloc;
918     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
919     PetscScalar *a;
920 
921     if (!rank) {
922       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
923     } else {
924       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
925     }
926     PetscLogObjectParent(mat,A);
927 
928     /* copy over the A part */
929     Aloc = (Mat_SeqAIJ*)aij->A->data;
930     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
931     row = aij->rstart;
932     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
933     for (i=0; i<m; i++) {
934       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
935       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
936     }
937     aj = Aloc->j;
938     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
939 
940     /* copy over the B part */
941     Aloc = (Mat_SeqAIJ*)aij->B->data;
942     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row  = aij->rstart;
944     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
945     ct   = cols;
946     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
947     for (i=0; i<m; i++) {
948       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
949       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
950     }
951     ierr = PetscFree(ct);CHKERRQ(ierr);
952     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
959     if (!rank) {
960       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
961       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
962     }
963     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
964     ierr = MatDestroy(A);CHKERRQ(ierr);
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatView_MPIAIJ"
971 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
972 {
973   int        ierr;
974   PetscTruth isascii,isdraw,issocket,isbinary;
975 
976   PetscFunctionBegin;
977   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
978   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
979   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
980   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
981   if (isascii || isdraw || isbinary || issocket) {
982     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
983   } else {
984     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatRelax_MPIAIJ"
993 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
994 {
995   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
996   int          ierr;
997   Vec          bb1;
998   PetscScalar  mone=-1.0;
999 
1000   PetscFunctionBegin;
1001   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1002 
1003   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1004 
1005   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1006     if (flag & SOR_ZERO_INITIAL_GUESS) {
1007       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1008       its--;
1009     }
1010 
1011     while (its--) {
1012       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1013       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1014 
1015       /* update rhs: bb1 = bb - B*x */
1016       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1017       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1018 
1019       /* local sweep */
1020       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1021       CHKERRQ(ierr);
1022     }
1023   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1024     if (flag & SOR_ZERO_INITIAL_GUESS) {
1025       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1026       its--;
1027     }
1028     while (its--) {
1029       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1030       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1031 
1032       /* update rhs: bb1 = bb - B*x */
1033       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1034       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1035 
1036       /* local sweep */
1037       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1038       CHKERRQ(ierr);
1039     }
1040   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1041     if (flag & SOR_ZERO_INITIAL_GUESS) {
1042       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1043       its--;
1044     }
1045     while (its--) {
1046       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1048 
1049       /* update rhs: bb1 = bb - B*x */
1050       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1051       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1052 
1053       /* local sweep */
1054       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1055       CHKERRQ(ierr);
1056     }
1057   } else {
1058     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1059   }
1060 
1061   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1067 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1068 {
1069   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1070   Mat        A = mat->A,B = mat->B;
1071   int        ierr;
1072   PetscReal  isend[5],irecv[5];
1073 
1074   PetscFunctionBegin;
1075   info->block_size     = 1.0;
1076   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1077   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1078   isend[3] = info->memory;  isend[4] = info->mallocs;
1079   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1080   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1081   isend[3] += info->memory;  isend[4] += info->mallocs;
1082   if (flag == MAT_LOCAL) {
1083     info->nz_used      = isend[0];
1084     info->nz_allocated = isend[1];
1085     info->nz_unneeded  = isend[2];
1086     info->memory       = isend[3];
1087     info->mallocs      = isend[4];
1088   } else if (flag == MAT_GLOBAL_MAX) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   } else if (flag == MAT_GLOBAL_SUM) {
1096     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1097     info->nz_used      = irecv[0];
1098     info->nz_allocated = irecv[1];
1099     info->nz_unneeded  = irecv[2];
1100     info->memory       = irecv[3];
1101     info->mallocs      = irecv[4];
1102   }
1103   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1104   info->fill_ratio_needed = 0;
1105   info->factor_mallocs    = 0;
1106   info->rows_global       = (double)matin->M;
1107   info->columns_global    = (double)matin->N;
1108   info->rows_local        = (double)matin->m;
1109   info->columns_local     = (double)matin->N;
1110 
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatSetOption_MPIAIJ"
1116 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1117 {
1118   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1119   int        ierr;
1120 
1121   PetscFunctionBegin;
1122   switch (op) {
1123   case MAT_NO_NEW_NONZERO_LOCATIONS:
1124   case MAT_YES_NEW_NONZERO_LOCATIONS:
1125   case MAT_COLUMNS_UNSORTED:
1126   case MAT_COLUMNS_SORTED:
1127   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1128   case MAT_KEEP_ZEROED_ROWS:
1129   case MAT_NEW_NONZERO_LOCATION_ERR:
1130   case MAT_USE_INODES:
1131   case MAT_DO_NOT_USE_INODES:
1132   case MAT_IGNORE_ZERO_ENTRIES:
1133     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1134     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1135     break;
1136   case MAT_ROW_ORIENTED:
1137     a->roworiented = PETSC_TRUE;
1138     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1139     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1140     break;
1141   case MAT_ROWS_SORTED:
1142   case MAT_ROWS_UNSORTED:
1143   case MAT_YES_NEW_DIAGONALS:
1144     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1145     break;
1146   case MAT_COLUMN_ORIENTED:
1147     a->roworiented = PETSC_FALSE;
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_IGNORE_OFF_PROC_ENTRIES:
1152     a->donotstash = PETSC_TRUE;
1153     break;
1154   case MAT_NO_NEW_DIAGONALS:
1155     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1156   default:
1157     SETERRQ(PETSC_ERR_SUP,"unknown option");
1158   }
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatGetRow_MPIAIJ"
1164 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1165 {
1166   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1167   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1168   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1169   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1170   int          *cmap,*idx_p;
1171 
1172   PetscFunctionBegin;
1173   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1174   mat->getrowactive = PETSC_TRUE;
1175 
1176   if (!mat->rowvalues && (idx || v)) {
1177     /*
1178         allocate enough space to hold information from the longest row.
1179     */
1180     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1181     int     max = 1,tmp;
1182     for (i=0; i<matin->m; i++) {
1183       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1184       if (max < tmp) { max = tmp; }
1185     }
1186     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1187     mat->rowindices = (int*)(mat->rowvalues + max);
1188   }
1189 
1190   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1191   lrow = row - rstart;
1192 
1193   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1194   if (!v)   {pvA = 0; pvB = 0;}
1195   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1196   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1197   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1198   nztot = nzA + nzB;
1199 
1200   cmap  = mat->garray;
1201   if (v  || idx) {
1202     if (nztot) {
1203       /* Sort by increasing column numbers, assuming A and B already sorted */
1204       int imark = -1;
1205       if (v) {
1206         *v = v_p = mat->rowvalues;
1207         for (i=0; i<nzB; i++) {
1208           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1209           else break;
1210         }
1211         imark = i;
1212         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1213         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1214       }
1215       if (idx) {
1216         *idx = idx_p = mat->rowindices;
1217         if (imark > -1) {
1218           for (i=0; i<imark; i++) {
1219             idx_p[i] = cmap[cworkB[i]];
1220           }
1221         } else {
1222           for (i=0; i<nzB; i++) {
1223             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1224             else break;
1225           }
1226           imark = i;
1227         }
1228         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1229         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1230       }
1231     } else {
1232       if (idx) *idx = 0;
1233       if (v)   *v   = 0;
1234     }
1235   }
1236   *nz = nztot;
1237   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1238   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1244 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1245 {
1246   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1247 
1248   PetscFunctionBegin;
1249   if (aij->getrowactive == PETSC_FALSE) {
1250     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1251   }
1252   aij->getrowactive = PETSC_FALSE;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatNorm_MPIAIJ"
1258 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1259 {
1260   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1261   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1262   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1263   PetscReal    sum = 0.0;
1264   PetscScalar  *v;
1265 
1266   PetscFunctionBegin;
1267   if (aij->size == 1) {
1268     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1269   } else {
1270     if (type == NORM_FROBENIUS) {
1271       v = amat->a;
1272       for (i=0; i<amat->nz; i++) {
1273 #if defined(PETSC_USE_COMPLEX)
1274         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1275 #else
1276         sum += (*v)*(*v); v++;
1277 #endif
1278       }
1279       v = bmat->a;
1280       for (i=0; i<bmat->nz; i++) {
1281 #if defined(PETSC_USE_COMPLEX)
1282         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1283 #else
1284         sum += (*v)*(*v); v++;
1285 #endif
1286       }
1287       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1288       *norm = sqrt(*norm);
1289     } else if (type == NORM_1) { /* max column norm */
1290       PetscReal *tmp,*tmp2;
1291       int    *jj,*garray = aij->garray;
1292       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1293       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1294       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1295       *norm = 0.0;
1296       v = amat->a; jj = amat->j;
1297       for (j=0; j<amat->nz; j++) {
1298         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1299       }
1300       v = bmat->a; jj = bmat->j;
1301       for (j=0; j<bmat->nz; j++) {
1302         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1303       }
1304       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1305       for (j=0; j<mat->N; j++) {
1306         if (tmp2[j] > *norm) *norm = tmp2[j];
1307       }
1308       ierr = PetscFree(tmp);CHKERRQ(ierr);
1309       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1310     } else if (type == NORM_INFINITY) { /* max row norm */
1311       PetscReal ntemp = 0.0;
1312       for (j=0; j<aij->A->m; j++) {
1313         v = amat->a + amat->i[j] + shift;
1314         sum = 0.0;
1315         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1316           sum += PetscAbsScalar(*v); v++;
1317         }
1318         v = bmat->a + bmat->i[j] + shift;
1319         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1320           sum += PetscAbsScalar(*v); v++;
1321         }
1322         if (sum > ntemp) ntemp = sum;
1323       }
1324       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1325     } else {
1326       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1327     }
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatTranspose_MPIAIJ"
1334 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1335 {
1336   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1337   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1338   int          ierr,shift = Aloc->indexshift;
1339   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1340   Mat          B;
1341   PetscScalar  *array;
1342 
1343   PetscFunctionBegin;
1344   if (!matout && M != N) {
1345     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1346   }
1347 
1348   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1349 
1350   /* copy over the A part */
1351   Aloc = (Mat_SeqAIJ*)a->A->data;
1352   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1353   row = a->rstart;
1354   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1355   for (i=0; i<m; i++) {
1356     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1357     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1358   }
1359   aj = Aloc->j;
1360   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1361 
1362   /* copy over the B part */
1363   Aloc = (Mat_SeqAIJ*)a->B->data;
1364   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1365   row  = a->rstart;
1366   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1367   ct   = cols;
1368   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1369   for (i=0; i<m; i++) {
1370     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1371     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1372   }
1373   ierr = PetscFree(ct);CHKERRQ(ierr);
1374   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376   if (matout) {
1377     *matout = B;
1378   } else {
1379     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1380   }
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1386 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1387 {
1388   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1389   Mat        a = aij->A,b = aij->B;
1390   int        ierr,s1,s2,s3;
1391 
1392   PetscFunctionBegin;
1393   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1394   if (rr) {
1395     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1396     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1397     /* Overlap communication with computation. */
1398     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1399   }
1400   if (ll) {
1401     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1402     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1403     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1404   }
1405   /* scale  the diagonal block */
1406   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1407 
1408   if (rr) {
1409     /* Do a scatter end and then right scale the off-diagonal block */
1410     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1411     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1412   }
1413 
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1420 int MatPrintHelp_MPIAIJ(Mat A)
1421 {
1422   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1423   int        ierr;
1424 
1425   PetscFunctionBegin;
1426   if (!a->rank) {
1427     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1435 {
1436   PetscFunctionBegin;
1437   *bs = 1;
1438   PetscFunctionReturn(0);
1439 }
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1442 int MatSetUnfactored_MPIAIJ(Mat A)
1443 {
1444   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1445   int        ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatEqual_MPIAIJ"
1454 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1455 {
1456   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1457   Mat        a,b,c,d;
1458   PetscTruth flg;
1459   int        ierr;
1460 
1461   PetscFunctionBegin;
1462   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1463   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1464   a = matA->A; b = matA->B;
1465   c = matB->A; d = matB->B;
1466 
1467   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1468   if (flg == PETSC_TRUE) {
1469     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1470   }
1471   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatCopy_MPIAIJ"
1477 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1478 {
1479   int        ierr;
1480   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1481   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1482   PetscTruth flg;
1483 
1484   PetscFunctionBegin;
1485   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1486   if (str != SAME_NONZERO_PATTERN || !flg) {
1487     /* because of the column compression in the off-processor part of the matrix a->B,
1488        the number of columns in a->B and b->B may be different, hence we cannot call
1489        the MatCopy() directly on the two parts. If need be, we can provide a more
1490        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1491        then copying the submatrices */
1492     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1493   } else {
1494     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1495     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1496   }
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1502 int MatSetUpPreallocation_MPIAIJ(Mat A)
1503 {
1504   int        ierr;
1505 
1506   PetscFunctionBegin;
1507   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1512 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1513 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1514 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1515 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1516 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1517 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1518 #endif
1519 
1520 #include "petscblaslapack.h"
1521 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatAXPY_MPIAIJ"
1524 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1525 {
1526   int        ierr,one=1;
1527   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1528   Mat_SeqAIJ *x,*y;
1529 
1530   PetscFunctionBegin;
1531   if (str == SAME_NONZERO_PATTERN) {
1532     x  = (Mat_SeqAIJ *)xx->A->data;
1533     y  = (Mat_SeqAIJ *)yy->A->data;
1534     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1535     x  = (Mat_SeqAIJ *)xx->B->data;
1536     y  = (Mat_SeqAIJ *)yy->B->data;
1537     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1538   } else if (str == SUBSET_NONZERO_PATTERN) {
1539     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1540 
1541 
1542     int *xtoy,row,i,xcol,ycol,nz,jx,jy;
1543     x  = (Mat_SeqAIJ *)xx->B->data;
1544     y  = (Mat_SeqAIJ *)yy->B->data;
1545     int *xi=x->i,*yi=y->i;
1546 
1547     if (y->xtoy && y->XtoY != xx->B) {
1548       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1549       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1550     }
1551     if (!y->xtoy) { /* get xtoy */
1552       ierr = PetscMalloc(x->nz*sizeof(int),&xtoy);CHKERRQ(ierr);
1553       i = 0;
1554       for (row=0; row<xx->B->m; row++){
1555         nz = xi[1] - xi[0];
1556         jy = 0;
1557         for (jx=0; jx<nz; jx++,jy++){
1558           xcol = xx->garray[x->j[*xi + jx]];
1559           ycol = yy->garray[y->j[*yi + jy]];
1560           while ( ycol < xcol ) {
1561             jy++;
1562             ycol = yy->garray[y->j[*yi + jy]];
1563           }
1564           if (xcol != ycol) SETERRQ2(PETSC_ERR_ARG_WRONG,"X matrix entry (%d,%d) is not in Y matrix",row,ycol);
1565           xtoy[i++] = *yi + jy;
1566         }
1567         xi++; yi++;
1568       }
1569       y->xtoy = xtoy; /* attach xtoy to the denser matrix Y */
1570       y->XtoY = xx->B;
1571     }
1572 
1573     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1574   } else {
1575     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 /* -------------------------------------------------------------------*/
1581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1582        MatGetRow_MPIAIJ,
1583        MatRestoreRow_MPIAIJ,
1584        MatMult_MPIAIJ,
1585        MatMultAdd_MPIAIJ,
1586        MatMultTranspose_MPIAIJ,
1587        MatMultTransposeAdd_MPIAIJ,
1588        0,
1589        0,
1590        0,
1591        0,
1592        0,
1593        0,
1594        MatRelax_MPIAIJ,
1595        MatTranspose_MPIAIJ,
1596        MatGetInfo_MPIAIJ,
1597        MatEqual_MPIAIJ,
1598        MatGetDiagonal_MPIAIJ,
1599        MatDiagonalScale_MPIAIJ,
1600        MatNorm_MPIAIJ,
1601        MatAssemblyBegin_MPIAIJ,
1602        MatAssemblyEnd_MPIAIJ,
1603        0,
1604        MatSetOption_MPIAIJ,
1605        MatZeroEntries_MPIAIJ,
1606        MatZeroRows_MPIAIJ,
1607 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1608        MatLUFactorSymbolic_MPIAIJ_TFS,
1609 #else
1610        0,
1611 #endif
1612        0,
1613        0,
1614        0,
1615        MatSetUpPreallocation_MPIAIJ,
1616        0,
1617        0,
1618        0,
1619        0,
1620        MatDuplicate_MPIAIJ,
1621        0,
1622        0,
1623        0,
1624        0,
1625        MatAXPY_MPIAIJ,
1626        MatGetSubMatrices_MPIAIJ,
1627        MatIncreaseOverlap_MPIAIJ,
1628        MatGetValues_MPIAIJ,
1629        MatCopy_MPIAIJ,
1630        MatPrintHelp_MPIAIJ,
1631        MatScale_MPIAIJ,
1632        0,
1633        0,
1634        0,
1635        MatGetBlockSize_MPIAIJ,
1636        0,
1637        0,
1638        0,
1639        0,
1640        MatFDColoringCreate_MPIAIJ,
1641        0,
1642        MatSetUnfactored_MPIAIJ,
1643        0,
1644        0,
1645        MatGetSubMatrix_MPIAIJ,
1646        MatDestroy_MPIAIJ,
1647        MatView_MPIAIJ,
1648        MatGetPetscMaps_Petsc,
1649        0,
1650        0,
1651        0,
1652        0,
1653        0,
1654        0,
1655        0,
1656        0,
1657        MatSetColoring_MPIAIJ,
1658        MatSetValuesAdic_MPIAIJ,
1659        MatSetValuesAdifor_MPIAIJ
1660 };
1661 
1662 /* ----------------------------------------------------------------------------------------*/
1663 
1664 EXTERN_C_BEGIN
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1667 int MatStoreValues_MPIAIJ(Mat mat)
1668 {
1669   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1670   int        ierr;
1671 
1672   PetscFunctionBegin;
1673   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1674   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 EXTERN_C_END
1678 
1679 EXTERN_C_BEGIN
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1682 int MatRetrieveValues_MPIAIJ(Mat mat)
1683 {
1684   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1685   int        ierr;
1686 
1687   PetscFunctionBegin;
1688   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1689   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 EXTERN_C_END
1693 
1694 #include "petscpc.h"
1695 EXTERN_C_BEGIN
1696 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1697 EXTERN_C_END
1698 
1699 EXTERN_C_BEGIN
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatCreate_MPIAIJ"
1702 int MatCreate_MPIAIJ(Mat B)
1703 {
1704   Mat_MPIAIJ *b;
1705   int        ierr,i,size;
1706 
1707   PetscFunctionBegin;
1708   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1709 
1710   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1711   B->data         = (void*)b;
1712   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1713   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1714   B->factor       = 0;
1715   B->assembled    = PETSC_FALSE;
1716   B->mapping      = 0;
1717 
1718   B->insertmode      = NOT_SET_VALUES;
1719   b->size            = size;
1720   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1721 
1722   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1723   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1724 
1725   /* the information in the maps duplicates the information computed below, eventually
1726      we should remove the duplicate information that is not contained in the maps */
1727   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1728   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1729 
1730   /* build local table of row and column ownerships */
1731   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1732   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1733   b->cowners = b->rowners + b->size + 2;
1734   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1735   b->rowners[0] = 0;
1736   for (i=2; i<=b->size; i++) {
1737     b->rowners[i] += b->rowners[i-1];
1738   }
1739   b->rstart = b->rowners[b->rank];
1740   b->rend   = b->rowners[b->rank+1];
1741   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1742   b->cowners[0] = 0;
1743   for (i=2; i<=b->size; i++) {
1744     b->cowners[i] += b->cowners[i-1];
1745   }
1746   b->cstart = b->cowners[b->rank];
1747   b->cend   = b->cowners[b->rank+1];
1748 
1749   /* build cache for off array entries formed */
1750   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1751   b->donotstash  = PETSC_FALSE;
1752   b->colmap      = 0;
1753   b->garray      = 0;
1754   b->roworiented = PETSC_TRUE;
1755 
1756   /* stuff used for matrix vector multiply */
1757   b->lvec      = PETSC_NULL;
1758   b->Mvctx     = PETSC_NULL;
1759 
1760   /* stuff for MatGetRow() */
1761   b->rowindices   = 0;
1762   b->rowvalues    = 0;
1763   b->getrowactive = PETSC_FALSE;
1764 
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1766                                      "MatStoreValues_MPIAIJ",
1767                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1769                                      "MatRetrieveValues_MPIAIJ",
1770                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1772                                      "MatGetDiagonalBlock_MPIAIJ",
1773                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1774 
1775   PetscFunctionReturn(0);
1776 }
1777 EXTERN_C_END
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1781 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1782 {
1783   Mat        mat;
1784   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1785   int        ierr;
1786 
1787   PetscFunctionBegin;
1788   *newmat       = 0;
1789   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1790   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1791   a    = (Mat_MPIAIJ*)mat->data;
1792   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1793   mat->factor       = matin->factor;
1794   mat->assembled    = PETSC_TRUE;
1795   mat->insertmode   = NOT_SET_VALUES;
1796   mat->preallocated = PETSC_TRUE;
1797 
1798   a->rstart       = oldmat->rstart;
1799   a->rend         = oldmat->rend;
1800   a->cstart       = oldmat->cstart;
1801   a->cend         = oldmat->cend;
1802   a->size         = oldmat->size;
1803   a->rank         = oldmat->rank;
1804   a->donotstash   = oldmat->donotstash;
1805   a->roworiented  = oldmat->roworiented;
1806   a->rowindices   = 0;
1807   a->rowvalues    = 0;
1808   a->getrowactive = PETSC_FALSE;
1809 
1810   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1811   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1812   if (oldmat->colmap) {
1813 #if defined (PETSC_USE_CTABLE)
1814     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1815 #else
1816     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1817     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1818     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1819 #endif
1820   } else a->colmap = 0;
1821   if (oldmat->garray) {
1822     int len;
1823     len  = oldmat->B->n;
1824     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1825     PetscLogObjectMemory(mat,len*sizeof(int));
1826     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1827   } else a->garray = 0;
1828 
1829   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1830   PetscLogObjectParent(mat,a->lvec);
1831   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1832   PetscLogObjectParent(mat,a->Mvctx);
1833   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1834   PetscLogObjectParent(mat,a->A);
1835   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1836   PetscLogObjectParent(mat,a->B);
1837   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1838   *newmat = mat;
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #include "petscsys.h"
1843 
1844 EXTERN_C_BEGIN
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatLoad_MPIAIJ"
1847 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1848 {
1849   Mat          A;
1850   PetscScalar  *vals,*svals;
1851   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1852   MPI_Status   status;
1853   int          i,nz,ierr,j,rstart,rend,fd;
1854   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1855   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1856   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1857 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST)
1858   PetscTruth   flag;
1859 #endif
1860 
1861   PetscFunctionBegin;
1862   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1863   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1864   if (!rank) {
1865     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1866     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1867     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1868     if (header[3] < 0) {
1869       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1870     }
1871   }
1872 
1873   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1874   M = header[1]; N = header[2];
1875   /* determine ownership of all rows */
1876   m = M/size + ((M % size) > rank);
1877   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1878   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1879   rowners[0] = 0;
1880   for (i=2; i<=size; i++) {
1881     rowners[i] += rowners[i-1];
1882   }
1883   rstart = rowners[rank];
1884   rend   = rowners[rank+1];
1885 
1886   /* distribute row lengths to all processors */
1887   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1888   offlens = ourlens + (rend-rstart);
1889   if (!rank) {
1890     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1891     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1892     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1893     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1894     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1895     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1896   } else {
1897     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1898   }
1899 
1900   if (!rank) {
1901     /* calculate the number of nonzeros on each processor */
1902     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1903     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1904     for (i=0; i<size; i++) {
1905       for (j=rowners[i]; j< rowners[i+1]; j++) {
1906         procsnz[i] += rowlengths[j];
1907       }
1908     }
1909     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1910 
1911     /* determine max buffer needed and allocate it */
1912     maxnz = 0;
1913     for (i=0; i<size; i++) {
1914       maxnz = PetscMax(maxnz,procsnz[i]);
1915     }
1916     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1917 
1918     /* read in my part of the matrix column indices  */
1919     nz   = procsnz[0];
1920     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1921     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1922 
1923     /* read in every one elses and ship off */
1924     for (i=1; i<size; i++) {
1925       nz   = procsnz[i];
1926       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1927       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1928     }
1929     ierr = PetscFree(cols);CHKERRQ(ierr);
1930   } else {
1931     /* determine buffer space needed for message */
1932     nz = 0;
1933     for (i=0; i<m; i++) {
1934       nz += ourlens[i];
1935     }
1936     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1937 
1938     /* receive message of column indices*/
1939     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1940     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1941     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1942   }
1943 
1944   /* determine column ownership if matrix is not square */
1945   if (N != M) {
1946     n      = N/size + ((N % size) > rank);
1947     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1948     cstart = cend - n;
1949   } else {
1950     cstart = rstart;
1951     cend   = rend;
1952     n      = cend - cstart;
1953   }
1954 
1955   /* loop over local rows, determining number of off diagonal entries */
1956   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1957   jj = 0;
1958   for (i=0; i<m; i++) {
1959     for (j=0; j<ourlens[i]; j++) {
1960       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1961       jj++;
1962     }
1963   }
1964 
1965   /* create our matrix */
1966   for (i=0; i<m; i++) {
1967     ourlens[i] -= offlens[i];
1968   }
1969   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1970   A = *newmat;
1971   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1972   for (i=0; i<m; i++) {
1973     ourlens[i] += offlens[i];
1974   }
1975 
1976   if (!rank) {
1977     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1978 
1979     /* read in my part of the matrix numerical values  */
1980     nz   = procsnz[0];
1981     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1982 
1983     /* insert into matrix */
1984     jj      = rstart;
1985     smycols = mycols;
1986     svals   = vals;
1987     for (i=0; i<m; i++) {
1988       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1989       smycols += ourlens[i];
1990       svals   += ourlens[i];
1991       jj++;
1992     }
1993 
1994     /* read in other processors and ship out */
1995     for (i=1; i<size; i++) {
1996       nz   = procsnz[i];
1997       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1998       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1999     }
2000     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2001   } else {
2002     /* receive numeric values */
2003     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2004 
2005     /* receive message of values*/
2006     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2007     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2008     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2009 
2010     /* insert into matrix */
2011     jj      = rstart;
2012     smycols = mycols;
2013     svals   = vals;
2014     for (i=0; i<m; i++) {
2015       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2016       smycols += ourlens[i];
2017       svals   += ourlens[i];
2018       jj++;
2019     }
2020   }
2021   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2022   ierr = PetscFree(vals);CHKERRQ(ierr);
2023   ierr = PetscFree(mycols);CHKERRQ(ierr);
2024   ierr = PetscFree(rowners);CHKERRQ(ierr);
2025 
2026   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2027   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2028 #if defined(PETSC_HAVE_SPOOLES)
2029   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2030   if (flag) {
2031     if (size == 1) {
2032       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2033     } else {
2034       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2035     }
2036   }
2037 #endif
2038 #if defined(PETSC_HAVE_SUPERLUDIST)
2039   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2040   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2041 #endif
2042   PetscFunctionReturn(0);
2043 }
2044 EXTERN_C_END
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2048 /*
2049     Not great since it makes two copies of the submatrix, first an SeqAIJ
2050   in local and then by concatenating the local matrices the end result.
2051   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2052 */
2053 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2054 {
2055   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2056   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2057   Mat          *local,M,Mreuse;
2058   PetscScalar  *vwork,*aa;
2059   MPI_Comm     comm = mat->comm;
2060   Mat_SeqAIJ   *aij;
2061 
2062 
2063   PetscFunctionBegin;
2064   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2065   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2066 
2067   if (call ==  MAT_REUSE_MATRIX) {
2068     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2069     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2070     local = &Mreuse;
2071     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2072   } else {
2073     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2074     Mreuse = *local;
2075     ierr   = PetscFree(local);CHKERRQ(ierr);
2076   }
2077 
2078   /*
2079       m - number of local rows
2080       n - number of columns (same on all processors)
2081       rstart - first row in new global matrix generated
2082   */
2083   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2084   if (call == MAT_INITIAL_MATRIX) {
2085     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2086     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2087     ii  = aij->i;
2088     jj  = aij->j;
2089 
2090     /*
2091         Determine the number of non-zeros in the diagonal and off-diagonal
2092         portions of the matrix in order to do correct preallocation
2093     */
2094 
2095     /* first get start and end of "diagonal" columns */
2096     if (csize == PETSC_DECIDE) {
2097       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2098       if (mglobal == n) { /* square matrix */
2099 	nlocal = m;
2100       } else {
2101         nlocal = n/size + ((n % size) > rank);
2102       }
2103     } else {
2104       nlocal = csize;
2105     }
2106     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2107     rstart = rend - nlocal;
2108     if (rank == size - 1 && rend != n) {
2109       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2110     }
2111 
2112     /* next, compute all the lengths */
2113     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2114     olens = dlens + m;
2115     for (i=0; i<m; i++) {
2116       jend = ii[i+1] - ii[i];
2117       olen = 0;
2118       dlen = 0;
2119       for (j=0; j<jend; j++) {
2120         if (*jj < rstart || *jj >= rend) olen++;
2121         else dlen++;
2122         jj++;
2123       }
2124       olens[i] = olen;
2125       dlens[i] = dlen;
2126     }
2127     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2128     ierr = PetscFree(dlens);CHKERRQ(ierr);
2129   } else {
2130     int ml,nl;
2131 
2132     M = *newmat;
2133     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2134     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2135     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2136     /*
2137          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2138        rather than the slower MatSetValues().
2139     */
2140     M->was_assembled = PETSC_TRUE;
2141     M->assembled     = PETSC_FALSE;
2142   }
2143   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2144   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2145   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2146   ii  = aij->i;
2147   jj  = aij->j;
2148   aa  = aij->a;
2149   for (i=0; i<m; i++) {
2150     row   = rstart + i;
2151     nz    = ii[i+1] - ii[i];
2152     cwork = jj;     jj += nz;
2153     vwork = aa;     aa += nz;
2154     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2155   }
2156 
2157   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   *newmat = M;
2160 
2161   /* save submatrix used in processor for next request */
2162   if (call ==  MAT_INITIAL_MATRIX) {
2163     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2164     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2165   }
2166 
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2172 /*@C
2173    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2174    (the default parallel PETSc format).  For good matrix assembly performance
2175    the user should preallocate the matrix storage by setting the parameters
2176    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2177    performance can be increased by more than a factor of 50.
2178 
2179    Collective on MPI_Comm
2180 
2181    Input Parameters:
2182 +  A - the matrix
2183 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2184            (same value is used for all local rows)
2185 .  d_nnz - array containing the number of nonzeros in the various rows of the
2186            DIAGONAL portion of the local submatrix (possibly different for each row)
2187            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2188            The size of this array is equal to the number of local rows, i.e 'm'.
2189            You must leave room for the diagonal entry even if it is zero.
2190 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2191            submatrix (same value is used for all local rows).
2192 -  o_nnz - array containing the number of nonzeros in the various rows of the
2193            OFF-DIAGONAL portion of the local submatrix (possibly different for
2194            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2195            structure. The size of this array is equal to the number
2196            of local rows, i.e 'm'.
2197 
2198    The AIJ format (also called the Yale sparse matrix format or
2199    compressed row storage), is fully compatible with standard Fortran 77
2200    storage.  That is, the stored row and column indices can begin at
2201    either one (as in Fortran) or zero.  See the users manual for details.
2202 
2203    The user MUST specify either the local or global matrix dimensions
2204    (possibly both).
2205 
2206    The parallel matrix is partitioned such that the first m0 rows belong to
2207    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2208    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2209 
2210    The DIAGONAL portion of the local submatrix of a processor can be defined
2211    as the submatrix which is obtained by extraction the part corresponding
2212    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2213    first row that belongs to the processor, and r2 is the last row belonging
2214    to the this processor. This is a square mxm matrix. The remaining portion
2215    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2216 
2217    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2218 
2219    By default, this format uses inodes (identical nodes) when possible.
2220    We search for consecutive rows with the same nonzero structure, thereby
2221    reusing matrix information to achieve increased efficiency.
2222 
2223    Options Database Keys:
2224 +  -mat_aij_no_inode  - Do not use inodes
2225 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2226 -  -mat_aij_oneindex - Internally use indexing starting at 1
2227         rather than 0.  Note that when calling MatSetValues(),
2228         the user still MUST index entries starting at 0!
2229 
2230    Example usage:
2231 
2232    Consider the following 8x8 matrix with 34 non-zero values, that is
2233    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2234    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2235    as follows:
2236 
2237 .vb
2238             1  2  0  |  0  3  0  |  0  4
2239     Proc0   0  5  6  |  7  0  0  |  8  0
2240             9  0 10  | 11  0  0  | 12  0
2241     -------------------------------------
2242            13  0 14  | 15 16 17  |  0  0
2243     Proc1   0 18  0  | 19 20 21  |  0  0
2244             0  0  0  | 22 23  0  | 24  0
2245     -------------------------------------
2246     Proc2  25 26 27  |  0  0 28  | 29  0
2247            30  0  0  | 31 32 33  |  0 34
2248 .ve
2249 
2250    This can be represented as a collection of submatrices as:
2251 
2252 .vb
2253       A B C
2254       D E F
2255       G H I
2256 .ve
2257 
2258    Where the submatrices A,B,C are owned by proc0, D,E,F are
2259    owned by proc1, G,H,I are owned by proc2.
2260 
2261    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2262    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2263    The 'M','N' parameters are 8,8, and have the same values on all procs.
2264 
2265    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2266    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2267    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2268    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2269    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2270    matrix, ans [DF] as another SeqAIJ matrix.
2271 
2272    When d_nz, o_nz parameters are specified, d_nz storage elements are
2273    allocated for every row of the local diagonal submatrix, and o_nz
2274    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2275    One way to choose d_nz and o_nz is to use the max nonzerors per local
2276    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2277    In this case, the values of d_nz,o_nz are:
2278 .vb
2279      proc0 : dnz = 2, o_nz = 2
2280      proc1 : dnz = 3, o_nz = 2
2281      proc2 : dnz = 1, o_nz = 4
2282 .ve
2283    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2284    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2285    for proc3. i.e we are using 12+15+10=37 storage locations to store
2286    34 values.
2287 
2288    When d_nnz, o_nnz parameters are specified, the storage is specified
2289    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2290    In the above case the values for d_nnz,o_nnz are:
2291 .vb
2292      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2293      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2294      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2295 .ve
2296    Here the space allocated is sum of all the above values i.e 34, and
2297    hence pre-allocation is perfect.
2298 
2299    Level: intermediate
2300 
2301 .keywords: matrix, aij, compressed row, sparse, parallel
2302 
2303 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2304 @*/
2305 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2306 {
2307   Mat_MPIAIJ   *b;
2308   int          ierr,i;
2309   PetscTruth   flg2;
2310 
2311   PetscFunctionBegin;
2312   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2313   if (!flg2) PetscFunctionReturn(0);
2314   B->preallocated = PETSC_TRUE;
2315   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2316   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2317   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2318   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2319   if (d_nnz) {
2320     for (i=0; i<B->m; i++) {
2321       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2322     }
2323   }
2324   if (o_nnz) {
2325     for (i=0; i<B->m; i++) {
2326       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2327     }
2328   }
2329   b = (Mat_MPIAIJ*)B->data;
2330 
2331   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2332   PetscLogObjectParent(B,b->A);
2333   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2334   PetscLogObjectParent(B,b->B);
2335 
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatCreateMPIAIJ"
2341 /*@C
2342    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2343    (the default parallel PETSc format).  For good matrix assembly performance
2344    the user should preallocate the matrix storage by setting the parameters
2345    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2346    performance can be increased by more than a factor of 50.
2347 
2348    Collective on MPI_Comm
2349 
2350    Input Parameters:
2351 +  comm - MPI communicator
2352 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2353            This value should be the same as the local size used in creating the
2354            y vector for the matrix-vector product y = Ax.
2355 .  n - This value should be the same as the local size used in creating the
2356        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2357        calculated if N is given) For square matrices n is almost always m.
2358 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2359 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2360 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2361            (same value is used for all local rows)
2362 .  d_nnz - array containing the number of nonzeros in the various rows of the
2363            DIAGONAL portion of the local submatrix (possibly different for each row)
2364            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2365            The size of this array is equal to the number of local rows, i.e 'm'.
2366            You must leave room for the diagonal entry even if it is zero.
2367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2368            submatrix (same value is used for all local rows).
2369 -  o_nnz - array containing the number of nonzeros in the various rows of the
2370            OFF-DIAGONAL portion of the local submatrix (possibly different for
2371            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2372            structure. The size of this array is equal to the number
2373            of local rows, i.e 'm'.
2374 
2375    Output Parameter:
2376 .  A - the matrix
2377 
2378    Notes:
2379    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2380    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2381    storage requirements for this matrix.
2382 
2383    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2384    processor than it must be used on all processors that share the object for
2385    that argument.
2386 
2387    The AIJ format (also called the Yale sparse matrix format or
2388    compressed row storage), is fully compatible with standard Fortran 77
2389    storage.  That is, the stored row and column indices can begin at
2390    either one (as in Fortran) or zero.  See the users manual for details.
2391 
2392    The user MUST specify either the local or global matrix dimensions
2393    (possibly both).
2394 
2395    The parallel matrix is partitioned such that the first m0 rows belong to
2396    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2397    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2398 
2399    The DIAGONAL portion of the local submatrix of a processor can be defined
2400    as the submatrix which is obtained by extraction the part corresponding
2401    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2402    first row that belongs to the processor, and r2 is the last row belonging
2403    to the this processor. This is a square mxm matrix. The remaining portion
2404    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2405 
2406    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2407 
2408    By default, this format uses inodes (identical nodes) when possible.
2409    We search for consecutive rows with the same nonzero structure, thereby
2410    reusing matrix information to achieve increased efficiency.
2411 
2412    Options Database Keys:
2413 +  -mat_aij_no_inode  - Do not use inodes
2414 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2415 -  -mat_aij_oneindex - Internally use indexing starting at 1
2416         rather than 0.  Note that when calling MatSetValues(),
2417         the user still MUST index entries starting at 0!
2418 
2419 
2420    Example usage:
2421 
2422    Consider the following 8x8 matrix with 34 non-zero values, that is
2423    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2424    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2425    as follows:
2426 
2427 .vb
2428             1  2  0  |  0  3  0  |  0  4
2429     Proc0   0  5  6  |  7  0  0  |  8  0
2430             9  0 10  | 11  0  0  | 12  0
2431     -------------------------------------
2432            13  0 14  | 15 16 17  |  0  0
2433     Proc1   0 18  0  | 19 20 21  |  0  0
2434             0  0  0  | 22 23  0  | 24  0
2435     -------------------------------------
2436     Proc2  25 26 27  |  0  0 28  | 29  0
2437            30  0  0  | 31 32 33  |  0 34
2438 .ve
2439 
2440    This can be represented as a collection of submatrices as:
2441 
2442 .vb
2443       A B C
2444       D E F
2445       G H I
2446 .ve
2447 
2448    Where the submatrices A,B,C are owned by proc0, D,E,F are
2449    owned by proc1, G,H,I are owned by proc2.
2450 
2451    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2452    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2453    The 'M','N' parameters are 8,8, and have the same values on all procs.
2454 
2455    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2456    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2457    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2458    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2459    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2460    matrix, ans [DF] as another SeqAIJ matrix.
2461 
2462    When d_nz, o_nz parameters are specified, d_nz storage elements are
2463    allocated for every row of the local diagonal submatrix, and o_nz
2464    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2465    One way to choose d_nz and o_nz is to use the max nonzerors per local
2466    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2467    In this case, the values of d_nz,o_nz are:
2468 .vb
2469      proc0 : dnz = 2, o_nz = 2
2470      proc1 : dnz = 3, o_nz = 2
2471      proc2 : dnz = 1, o_nz = 4
2472 .ve
2473    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2474    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2475    for proc3. i.e we are using 12+15+10=37 storage locations to store
2476    34 values.
2477 
2478    When d_nnz, o_nnz parameters are specified, the storage is specified
2479    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2480    In the above case the values for d_nnz,o_nnz are:
2481 .vb
2482      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2483      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2484      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2485 .ve
2486    Here the space allocated is sum of all the above values i.e 34, and
2487    hence pre-allocation is perfect.
2488 
2489    Level: intermediate
2490 
2491 .keywords: matrix, aij, compressed row, sparse, parallel
2492 
2493 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2494 @*/
2495 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2496 {
2497   int ierr,size;
2498 
2499   PetscFunctionBegin;
2500   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2501   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2502   if (size > 1) {
2503     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2504     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2505   } else {
2506     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2507     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2508   }
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2514 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2515 {
2516   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2517   PetscFunctionBegin;
2518   *Ad     = a->A;
2519   *Ao     = a->B;
2520   *colmap = a->garray;
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2526 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2527 {
2528   int        ierr,i;
2529   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2530 
2531   PetscFunctionBegin;
2532   if (coloring->ctype == IS_COLORING_LOCAL) {
2533     ISColoringValue *allcolors,*colors;
2534     ISColoring      ocoloring;
2535 
2536     /* set coloring for diagonal portion */
2537     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2538 
2539     /* set coloring for off-diagonal portion */
2540     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2541     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2542     for (i=0; i<a->B->n; i++) {
2543       colors[i] = allcolors[a->garray[i]];
2544     }
2545     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2546     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2547     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2548     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2549   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2550     ISColoringValue *colors;
2551     int             *larray;
2552     ISColoring      ocoloring;
2553 
2554     /* set coloring for diagonal portion */
2555     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2556     for (i=0; i<a->A->n; i++) {
2557       larray[i] = i + a->cstart;
2558     }
2559     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2560     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2561     for (i=0; i<a->A->n; i++) {
2562       colors[i] = coloring->colors[larray[i]];
2563     }
2564     ierr = PetscFree(larray);CHKERRQ(ierr);
2565     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2566     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2567     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2568 
2569     /* set coloring for off-diagonal portion */
2570     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2571     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2572     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2573     for (i=0; i<a->B->n; i++) {
2574       colors[i] = coloring->colors[larray[i]];
2575     }
2576     ierr = PetscFree(larray);CHKERRQ(ierr);
2577     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2578     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2579     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2580   } else {
2581     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2582   }
2583 
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2589 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2592   int        ierr;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2596   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2602 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2603 {
2604   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2605   int        ierr;
2606 
2607   PetscFunctionBegin;
2608   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2609   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatMerge"
2615 /*@C
2616       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2617                  matrices from each processor
2618 
2619     Collective on MPI_Comm
2620 
2621    Input Parameters:
2622 +    comm - the communicators the parallel matrix will live on
2623 -    inmat - the input sequential matrices
2624 
2625    Output Parameter:
2626 .    outmat - the parallel matrix generated
2627 
2628     Level: advanced
2629 
2630    Notes: The number of columns of the matrix in EACH of the seperate files
2631       MUST be the same.
2632 
2633 @*/
2634 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2635 {
2636   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2637   PetscScalar *values;
2638   PetscMap    columnmap,rowmap;
2639 
2640   PetscFunctionBegin;
2641 
2642   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2643 
2644   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2645   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2646   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2647   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2648   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2649   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2650 
2651   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2652   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2653   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2654   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2655   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2656 
2657   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2658   for (i=0;i<m;i++) {
2659     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2660     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2661     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2662   }
2663   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2664   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2665 
2666   for (i=0;i<m;i++) {
2667     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2668     I    = i + rstart;
2669     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2670     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2671   }
2672   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2673   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2674   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2675 
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatFileSplit"
2681 int MatFileSplit(Mat A,char *outfile)
2682 {
2683   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2684   PetscViewer out;
2685   char        *name;
2686   Mat         B;
2687   PetscScalar *values;
2688 
2689   PetscFunctionBegin;
2690 
2691   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2692   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2693   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2694   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2695   for (i=0;i<m;i++) {
2696     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2697     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2698     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2699   }
2700   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2701   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2702 
2703   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2704   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2705   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2706   sprintf(name,"%s.%d",outfile,rank);
2707   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2708   ierr = PetscFree(name);
2709   ierr = MatView(B,out);CHKERRQ(ierr);
2710   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2711   ierr = MatDestroy(B);CHKERRQ(ierr);
2712   PetscFunctionReturn(0);
2713 }
2714