xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c537a17644e75ddee54e85f4b8155d17b42d28e5)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *nprocs,j,idx,nsends,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
455   for (i=0; i<N; i++) {
456     idx = rows[i];
457     found = PETSC_FALSE;
458     for (j=0; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461       }
462     }
463     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464   }
465   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
466 
467   /* inform other processors of number of messages and max length*/
468   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
469 
470   /* post receives:   */
471   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
472   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
473   for (i=0; i<nrecvs; i++) {
474     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
475   }
476 
477   /* do sends:
478       1) starts[i] gives the starting index in svalues for stuff going to
479          the ith processor
480   */
481   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
483   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
484   starts[0] = 0;
485   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   for (i=0; i<N; i++) {
487     svalues[starts[owner[i]]++] = rows[i];
488   }
489   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
490 
491   starts[0] = 0;
492   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   count = 0;
494   for (i=0; i<size; i++) {
495     if (nprocs[2*i+1]) {
496       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
497     }
498   }
499   ierr = PetscFree(starts);CHKERRQ(ierr);
500 
501   base = owners[rank];
502 
503   /*  wait on receives */
504   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
505   source = lens + nrecvs;
506   count  = nrecvs; slen = 0;
507   while (count) {
508     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
509     /* unpack receives into our local space */
510     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
511     source[imdex]  = recv_status.MPI_SOURCE;
512     lens[imdex]    = n;
513     slen          += n;
514     count--;
515   }
516   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
517 
518   /* move the data into the send scatter */
519   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
520   count = 0;
521   for (i=0; i<nrecvs; i++) {
522     values = rvalues + i*nmax;
523     for (j=0; j<lens[i]; j++) {
524       lrows[count++] = values[j] - base;
525     }
526   }
527   ierr = PetscFree(rvalues);CHKERRQ(ierr);
528   ierr = PetscFree(lens);CHKERRQ(ierr);
529   ierr = PetscFree(owner);CHKERRQ(ierr);
530   ierr = PetscFree(nprocs);CHKERRQ(ierr);
531 
532   /* actually zap the local rows */
533   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
534   PetscLogObjectParent(A,istmp);
535 
536   /*
537         Zero the required rows. If the "diagonal block" of the matrix
538      is square and the user wishes to set the diagonal we use seperate
539      code so that MatSetValues() is not called for each diagonal allocating
540      new memory, thus calling lots of mallocs and slowing things down.
541 
542        Contributed by: Mathew Knepley
543   */
544   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
546   if (diag && (l->A->M == l->A->N)) {
547     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
548   } else if (diag) {
549     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
550     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553     }
554     for (i = 0; i < slen; i++) {
555       row  = lrows[i] + rstart;
556       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
557     }
558     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560   } else {
561     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
562   }
563   ierr = ISDestroy(istmp);CHKERRQ(ierr);
564   ierr = PetscFree(lrows);CHKERRQ(ierr);
565 
566   /* wait on sends */
567   if (nsends) {
568     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
569     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
570     ierr = PetscFree(send_status);CHKERRQ(ierr);
571   }
572   ierr = PetscFree(send_waits);CHKERRQ(ierr);
573   ierr = PetscFree(svalues);CHKERRQ(ierr);
574 
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_MPIAIJ"
580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583   int        ierr,nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602   int        ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638   int        ierr;
639 
640   PetscFunctionBegin;
641   /* do nondiagonal part */
642   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
643   /* send it on its way */
644   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   /* do local part */
646   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
647   /* receive remote parts: note this assumes the values are not actually */
648   /* inserted in yy until the next line, which is true for my implementation*/
649   /* but is not perhaps always true. */
650   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*
655   This only works correctly for square matrices where the subblock A->A is the
656    diagonal block
657 */
658 #undef __FUNCT__
659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661 {
662   int        ierr;
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664 
665   PetscFunctionBegin;
666   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667   if (a->rstart != a->cstart || a->rend != a->cend) {
668     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669   }
670   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatScale_MPIAIJ"
676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
683   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatDestroy_MPIAIJ"
689 int MatDestroy_MPIAIJ(Mat mat)
690 {
691   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692   int        ierr;
693 
694   PetscFunctionBegin;
695 #if defined(PETSC_USE_LOG)
696   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697 #endif
698   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
699   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
700   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
702 #if defined (PETSC_USE_CTABLE)
703   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
704 #else
705   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
706 #endif
707   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
708   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
709   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
710   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
711   ierr = PetscFree(aij);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatView_MPIAIJ_Binary"
720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
721 {
722   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
724   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
725   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
726   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
727   PetscScalar       *column_values;
728 
729   PetscFunctionBegin;
730   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
731   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
732   nz   = A->nz + B->nz;
733   if (rank == 0) {
734     header[0] = MAT_FILE_COOKIE;
735     header[1] = mat->M;
736     header[2] = mat->N;
737     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
738     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
739     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
740     /* get largest number of rows any processor has */
741     rlen = mat->m;
742     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
743     for (i=1; i<size; i++) {
744       rlen = PetscMax(rlen,range[i+1] - range[i]);
745     }
746   } else {
747     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     rlen = mat->m;
749   }
750 
751   /* load up the local row counts */
752   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
753   for (i=0; i<mat->m; i++) {
754     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
755   }
756 
757   /* store the row lengths to the file */
758   if (rank == 0) {
759     MPI_Status status;
760     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
761     for (i=1; i<size; i++) {
762       rlen = range[i+1] - range[i];
763       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
764       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
765     }
766   } else {
767     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
768   }
769   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
770 
771   /* load up the local column indices */
772   nzmax = nz; /* )th processor needs space a largest processor needs */
773   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
774   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
775   cnt  = 0;
776   for (i=0; i<mat->m; i++) {
777     for (j=B->i[i]; j<B->i[i+1]; j++) {
778       if ( (col = garray[B->j[j]]) > cstart) break;
779       column_indices[cnt++] = col;
780     }
781     for (k=A->i[i]; k<A->i[i+1]; k++) {
782       column_indices[cnt++] = A->j[k] + cstart;
783     }
784     for (; j<B->i[i+1]; j++) {
785       column_indices[cnt++] = garray[B->j[j]];
786     }
787   }
788   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
789 
790   /* store the column indices to the file */
791   if (rank == 0) {
792     MPI_Status status;
793     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
796       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
797       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
803   }
804   ierr = PetscFree(column_indices);CHKERRQ(ierr);
805 
806   /* load up the local column values */
807   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
808   cnt  = 0;
809   for (i=0; i<mat->m; i++) {
810     for (j=B->i[i]; j<B->i[i+1]; j++) {
811       if ( garray[B->j[j]] > cstart) break;
812       column_values[cnt++] = B->a[j];
813     }
814     for (k=A->i[i]; k<A->i[i+1]; k++) {
815       column_values[cnt++] = A->a[k];
816     }
817     for (; j<B->i[i+1]; j++) {
818       column_values[cnt++] = B->a[j];
819     }
820   }
821   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
822 
823   /* store the column values to the file */
824   if (rank == 0) {
825     MPI_Status status;
826     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
827     for (i=1; i<size; i++) {
828       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
829       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
830       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
831       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
832     }
833   } else {
834     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
835     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
836   }
837   ierr = PetscFree(column_values);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
844 {
845   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
846   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
847   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
848   PetscTruth        isdraw,isascii,flg,isbinary;
849   PetscViewer       sviewer;
850   PetscViewerFormat format;
851 
852   PetscFunctionBegin;
853   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
856   if (isascii) {
857     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
858     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
859       MatInfo info;
860       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
861       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
862       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
863       if (flg) {
864         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
865 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
866       } else {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
868 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
869       }
870       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
872       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
873       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
874       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
875       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
876       PetscFunctionReturn(0);
877     } else if (format == PETSC_VIEWER_ASCII_INFO) {
878       PetscFunctionReturn(0);
879     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
881       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
882 #endif
883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
884       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
885 #endif
886 
887       PetscFunctionReturn(0);
888     }
889   } else if (isbinary) {
890     if (size == 1) {
891       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
892       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
893     } else {
894       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
895     }
896     PetscFunctionReturn(0);
897   } else if (isdraw) {
898     PetscDraw  draw;
899     PetscTruth isnull;
900     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
901     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
902   }
903 
904   if (size == 1) {
905     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
906     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
907   } else {
908     /* assemble the entire matrix onto first processor. */
909     Mat         A;
910     Mat_SeqAIJ *Aloc;
911     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
912     PetscScalar *a;
913 
914     if (!rank) {
915       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
916     } else {
917       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
918     }
919     PetscLogObjectParent(mat,A);
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqAIJ*)aij->A->data;
923     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     row = aij->rstart;
925     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
926     for (i=0; i<m; i++) {
927       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
928       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
929     }
930     aj = Aloc->j;
931     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
932 
933     /* copy over the B part */
934     Aloc = (Mat_SeqAIJ*)aij->B->data;
935     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
936     row  = aij->rstart;
937     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
938     ct   = cols;
939     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
940     for (i=0; i<m; i++) {
941       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
942       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
943     }
944     ierr = PetscFree(ct);CHKERRQ(ierr);
945     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947     /*
948        Everyone has to call to draw the matrix since the graphics waits are
949        synchronized across all processors that share the PetscDraw object
950     */
951     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
952     if (!rank) {
953       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
954       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
955     }
956     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
957     ierr = MatDestroy(A);CHKERRQ(ierr);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatView_MPIAIJ"
964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
965 {
966   int        ierr;
967   PetscTruth isascii,isdraw,issocket,isbinary;
968 
969   PetscFunctionBegin;
970   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
971   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
974   if (isascii || isdraw || isbinary || issocket) {
975     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
976   } else {
977     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatRelax_MPIAIJ"
986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
987 {
988   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
989   int          ierr;
990   Vec          bb1;
991   PetscScalar  mone=-1.0;
992 
993   PetscFunctionBegin;
994   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
995 
996   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
997 
998   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
999     if (flag & SOR_ZERO_INITIAL_GUESS) {
1000       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1001       its--;
1002     }
1003 
1004     while (its--) {
1005       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1006       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1007 
1008       /* update rhs: bb1 = bb - B*x */
1009       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1010       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1011 
1012       /* local sweep */
1013       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1014       CHKERRQ(ierr);
1015     }
1016   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1017     if (flag & SOR_ZERO_INITIAL_GUESS) {
1018       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1019       its--;
1020     }
1021     while (its--) {
1022       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1023       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1024 
1025       /* update rhs: bb1 = bb - B*x */
1026       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1027       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1028 
1029       /* local sweep */
1030       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1031       CHKERRQ(ierr);
1032     }
1033   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1034     if (flag & SOR_ZERO_INITIAL_GUESS) {
1035       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1036       its--;
1037     }
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else {
1051     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1052   }
1053 
1054   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1061 {
1062   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1063   Mat        A = mat->A,B = mat->B;
1064   int        ierr;
1065   PetscReal  isend[5],irecv[5];
1066 
1067   PetscFunctionBegin;
1068   info->block_size     = 1.0;
1069   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1070   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1071   isend[3] = info->memory;  isend[4] = info->mallocs;
1072   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1073   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1074   isend[3] += info->memory;  isend[4] += info->mallocs;
1075   if (flag == MAT_LOCAL) {
1076     info->nz_used      = isend[0];
1077     info->nz_allocated = isend[1];
1078     info->nz_unneeded  = isend[2];
1079     info->memory       = isend[3];
1080     info->mallocs      = isend[4];
1081   } else if (flag == MAT_GLOBAL_MAX) {
1082     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1083     info->nz_used      = irecv[0];
1084     info->nz_allocated = irecv[1];
1085     info->nz_unneeded  = irecv[2];
1086     info->memory       = irecv[3];
1087     info->mallocs      = irecv[4];
1088   } else if (flag == MAT_GLOBAL_SUM) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   }
1096   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1097   info->fill_ratio_needed = 0;
1098   info->factor_mallocs    = 0;
1099   info->rows_global       = (double)matin->M;
1100   info->columns_global    = (double)matin->N;
1101   info->rows_local        = (double)matin->m;
1102   info->columns_local     = (double)matin->N;
1103 
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatSetOption_MPIAIJ"
1109 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1110 {
1111   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112   int        ierr;
1113 
1114   PetscFunctionBegin;
1115   switch (op) {
1116   case MAT_NO_NEW_NONZERO_LOCATIONS:
1117   case MAT_YES_NEW_NONZERO_LOCATIONS:
1118   case MAT_COLUMNS_UNSORTED:
1119   case MAT_COLUMNS_SORTED:
1120   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1121   case MAT_KEEP_ZEROED_ROWS:
1122   case MAT_NEW_NONZERO_LOCATION_ERR:
1123   case MAT_USE_INODES:
1124   case MAT_DO_NOT_USE_INODES:
1125   case MAT_IGNORE_ZERO_ENTRIES:
1126     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1127     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1128     break;
1129   case MAT_ROW_ORIENTED:
1130     a->roworiented = PETSC_TRUE;
1131     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1132     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1133     break;
1134   case MAT_ROWS_SORTED:
1135   case MAT_ROWS_UNSORTED:
1136   case MAT_YES_NEW_DIAGONALS:
1137     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1138     break;
1139   case MAT_COLUMN_ORIENTED:
1140     a->roworiented = PETSC_FALSE;
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_IGNORE_OFF_PROC_ENTRIES:
1145     a->donotstash = PETSC_TRUE;
1146     break;
1147   case MAT_NO_NEW_DIAGONALS:
1148     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1149   default:
1150     SETERRQ(PETSC_ERR_SUP,"unknown option");
1151   }
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatGetRow_MPIAIJ"
1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1158 {
1159   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1160   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1161   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1162   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1163   int          *cmap,*idx_p;
1164 
1165   PetscFunctionBegin;
1166   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1167   mat->getrowactive = PETSC_TRUE;
1168 
1169   if (!mat->rowvalues && (idx || v)) {
1170     /*
1171         allocate enough space to hold information from the longest row.
1172     */
1173     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1174     int     max = 1,tmp;
1175     for (i=0; i<matin->m; i++) {
1176       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1177       if (max < tmp) { max = tmp; }
1178     }
1179     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1180     mat->rowindices = (int*)(mat->rowvalues + max);
1181   }
1182 
1183   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1184   lrow = row - rstart;
1185 
1186   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187   if (!v)   {pvA = 0; pvB = 0;}
1188   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1189   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1190   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1191   nztot = nzA + nzB;
1192 
1193   cmap  = mat->garray;
1194   if (v  || idx) {
1195     if (nztot) {
1196       /* Sort by increasing column numbers, assuming A and B already sorted */
1197       int imark = -1;
1198       if (v) {
1199         *v = v_p = mat->rowvalues;
1200         for (i=0; i<nzB; i++) {
1201           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1202           else break;
1203         }
1204         imark = i;
1205         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1206         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1207       }
1208       if (idx) {
1209         *idx = idx_p = mat->rowindices;
1210         if (imark > -1) {
1211           for (i=0; i<imark; i++) {
1212             idx_p[i] = cmap[cworkB[i]];
1213           }
1214         } else {
1215           for (i=0; i<nzB; i++) {
1216             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1217             else break;
1218           }
1219           imark = i;
1220         }
1221         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1222         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1223       }
1224     } else {
1225       if (idx) *idx = 0;
1226       if (v)   *v   = 0;
1227     }
1228   }
1229   *nz = nztot;
1230   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1231   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1237 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1238 {
1239   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1240 
1241   PetscFunctionBegin;
1242   if (aij->getrowactive == PETSC_FALSE) {
1243     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1244   }
1245   aij->getrowactive = PETSC_FALSE;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatNorm_MPIAIJ"
1251 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1252 {
1253   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1254   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1255   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1256   PetscReal    sum = 0.0;
1257   PetscScalar  *v;
1258 
1259   PetscFunctionBegin;
1260   if (aij->size == 1) {
1261     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1262   } else {
1263     if (type == NORM_FROBENIUS) {
1264       v = amat->a;
1265       for (i=0; i<amat->nz; i++) {
1266 #if defined(PETSC_USE_COMPLEX)
1267         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1268 #else
1269         sum += (*v)*(*v); v++;
1270 #endif
1271       }
1272       v = bmat->a;
1273       for (i=0; i<bmat->nz; i++) {
1274 #if defined(PETSC_USE_COMPLEX)
1275         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1276 #else
1277         sum += (*v)*(*v); v++;
1278 #endif
1279       }
1280       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1281       *norm = sqrt(*norm);
1282     } else if (type == NORM_1) { /* max column norm */
1283       PetscReal *tmp,*tmp2;
1284       int    *jj,*garray = aij->garray;
1285       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1286       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1287       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1288       *norm = 0.0;
1289       v = amat->a; jj = amat->j;
1290       for (j=0; j<amat->nz; j++) {
1291         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1292       }
1293       v = bmat->a; jj = bmat->j;
1294       for (j=0; j<bmat->nz; j++) {
1295         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1296       }
1297       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1298       for (j=0; j<mat->N; j++) {
1299         if (tmp2[j] > *norm) *norm = tmp2[j];
1300       }
1301       ierr = PetscFree(tmp);CHKERRQ(ierr);
1302       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1303     } else if (type == NORM_INFINITY) { /* max row norm */
1304       PetscReal ntemp = 0.0;
1305       for (j=0; j<aij->A->m; j++) {
1306         v = amat->a + amat->i[j] + shift;
1307         sum = 0.0;
1308         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1309           sum += PetscAbsScalar(*v); v++;
1310         }
1311         v = bmat->a + bmat->i[j] + shift;
1312         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1313           sum += PetscAbsScalar(*v); v++;
1314         }
1315         if (sum > ntemp) ntemp = sum;
1316       }
1317       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1318     } else {
1319       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatTranspose_MPIAIJ"
1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328 {
1329   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1330   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1331   int          ierr,shift = Aloc->indexshift;
1332   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1333   Mat          B;
1334   PetscScalar  *array;
1335 
1336   PetscFunctionBegin;
1337   if (!matout && M != N) {
1338     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1339   }
1340 
1341   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1342 
1343   /* copy over the A part */
1344   Aloc = (Mat_SeqAIJ*)a->A->data;
1345   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1346   row = a->rstart;
1347   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1348   for (i=0; i<m; i++) {
1349     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1350     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1351   }
1352   aj = Aloc->j;
1353   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1354 
1355   /* copy over the B part */
1356   Aloc = (Mat_SeqAIJ*)a->B->data;
1357   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1358   row  = a->rstart;
1359   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1360   ct   = cols;
1361   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1362   for (i=0; i<m; i++) {
1363     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1364     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1365   }
1366   ierr = PetscFree(ct);CHKERRQ(ierr);
1367   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1368   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369   if (matout) {
1370     *matout = B;
1371   } else {
1372     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1380 {
1381   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1382   Mat        a = aij->A,b = aij->B;
1383   int        ierr,s1,s2,s3;
1384 
1385   PetscFunctionBegin;
1386   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1387   if (rr) {
1388     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1389     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1390     /* Overlap communication with computation. */
1391     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1392   }
1393   if (ll) {
1394     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1395     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1396     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1397   }
1398   /* scale  the diagonal block */
1399   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1400 
1401   if (rr) {
1402     /* Do a scatter end and then right scale the off-diagonal block */
1403     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1404     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1405   }
1406 
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1413 int MatPrintHelp_MPIAIJ(Mat A)
1414 {
1415   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1416   int        ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1428 {
1429   PetscFunctionBegin;
1430   *bs = 1;
1431   PetscFunctionReturn(0);
1432 }
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1435 int MatSetUnfactored_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatEqual_MPIAIJ"
1447 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1448 {
1449   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1450   Mat        a,b,c,d;
1451   PetscTruth flg;
1452   int        ierr;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1456   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1457   a = matA->A; b = matA->B;
1458   c = matB->A; d = matB->B;
1459 
1460   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1461   if (flg == PETSC_TRUE) {
1462     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1463   }
1464   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatCopy_MPIAIJ"
1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471 {
1472   int        ierr;
1473   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475   PetscTruth flg;
1476 
1477   PetscFunctionBegin;
1478   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1479   if (str != SAME_NONZERO_PATTERN || !flg) {
1480     /* because of the column compression in the off-processor part of the matrix a->B,
1481        the number of columns in a->B and b->B may be different, hence we cannot call
1482        the MatCopy() directly on the two parts. If need be, we can provide a more
1483        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1484        then copying the submatrices */
1485     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1486   } else {
1487     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1488     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1495 int MatSetUpPreallocation_MPIAIJ(Mat A)
1496 {
1497   int        ierr;
1498 
1499   PetscFunctionBegin;
1500   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1505 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1506 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1507 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1508 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1509 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1510 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1511 #endif
1512 
1513 #include "petscblaslapack.h"
1514 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatAXPY_MPIAIJ"
1517 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1518 {
1519   int        ierr,one=1;
1520   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1521   Mat_SeqAIJ *x,*y;
1522 
1523   PetscFunctionBegin;
1524   if (str == SAME_NONZERO_PATTERN) {
1525     x  = (Mat_SeqAIJ *)xx->A->data;
1526     y  = (Mat_SeqAIJ *)yy->A->data;
1527     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1528     x  = (Mat_SeqAIJ *)xx->B->data;
1529     y  = (Mat_SeqAIJ *)yy->B->data;
1530     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1531   } else if (str == SUBSET_NONZERO_PATTERN) {  /* B indeces have problem: map to original index! */
1532     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1533     /* ierr = MatAXPY_SeqAIJ(a,xx->B,yy->B,str);CHKERRQ(ierr); */
1534 
1535     int rank,nx,ny;
1536     ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
1537     if (rank == 0) {
1538       x  = (Mat_SeqAIJ *)xx->B->data;
1539       int j,row=0,*garray=xx->garray;
1540       MatGetSize(xx->B,PETSC_NULL,&nx);
1541       MatGetSize(yy->B,PETSC_NULL,&ny);
1542       printf("nx: %d, ny: %d\n",nx,ny);
1543       for (j=0; j<nx; j++){
1544         printf("%d, ",garray[j]);
1545       }
1546       printf("\n----------- \n");
1547       for (j=x->i[row]; j<x->i[row+1]; j++){
1548         printf("%d, ",x->j[j]);
1549       }
1550       printf("\n----------- \n");
1551       /* printf("[%d], xx->A: \n", rank);
1552          ierr = MatView(xx->A,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); */
1553       printf("[%d], xx->B: \n", rank);
1554       ierr = MatView(xx->B,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
1555       printf(" -----------\n");
1556     }
1557 
1558     ierr = MatView(X,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
1559   } else {
1560     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 /* -------------------------------------------------------------------*/
1566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1567        MatGetRow_MPIAIJ,
1568        MatRestoreRow_MPIAIJ,
1569        MatMult_MPIAIJ,
1570        MatMultAdd_MPIAIJ,
1571        MatMultTranspose_MPIAIJ,
1572        MatMultTransposeAdd_MPIAIJ,
1573        0,
1574        0,
1575        0,
1576        0,
1577        0,
1578        0,
1579        MatRelax_MPIAIJ,
1580        MatTranspose_MPIAIJ,
1581        MatGetInfo_MPIAIJ,
1582        MatEqual_MPIAIJ,
1583        MatGetDiagonal_MPIAIJ,
1584        MatDiagonalScale_MPIAIJ,
1585        MatNorm_MPIAIJ,
1586        MatAssemblyBegin_MPIAIJ,
1587        MatAssemblyEnd_MPIAIJ,
1588        0,
1589        MatSetOption_MPIAIJ,
1590        MatZeroEntries_MPIAIJ,
1591        MatZeroRows_MPIAIJ,
1592 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1593        MatLUFactorSymbolic_MPIAIJ_TFS,
1594 #else
1595        0,
1596 #endif
1597        0,
1598        0,
1599        0,
1600        MatSetUpPreallocation_MPIAIJ,
1601        0,
1602        0,
1603        0,
1604        0,
1605        MatDuplicate_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609        0,
1610        MatAXPY_MPIAIJ,
1611        MatGetSubMatrices_MPIAIJ,
1612        MatIncreaseOverlap_MPIAIJ,
1613        MatGetValues_MPIAIJ,
1614        MatCopy_MPIAIJ,
1615        MatPrintHelp_MPIAIJ,
1616        MatScale_MPIAIJ,
1617        0,
1618        0,
1619        0,
1620        MatGetBlockSize_MPIAIJ,
1621        0,
1622        0,
1623        0,
1624        0,
1625        MatFDColoringCreate_MPIAIJ,
1626        0,
1627        MatSetUnfactored_MPIAIJ,
1628        0,
1629        0,
1630        MatGetSubMatrix_MPIAIJ,
1631        MatDestroy_MPIAIJ,
1632        MatView_MPIAIJ,
1633        MatGetPetscMaps_Petsc,
1634        0,
1635        0,
1636        0,
1637        0,
1638        0,
1639        0,
1640        0,
1641        0,
1642        MatSetColoring_MPIAIJ,
1643        MatSetValuesAdic_MPIAIJ,
1644        MatSetValuesAdifor_MPIAIJ
1645 };
1646 
1647 /* ----------------------------------------------------------------------------------------*/
1648 
1649 EXTERN_C_BEGIN
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1652 int MatStoreValues_MPIAIJ(Mat mat)
1653 {
1654   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1655   int        ierr;
1656 
1657   PetscFunctionBegin;
1658   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1659   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1660   PetscFunctionReturn(0);
1661 }
1662 EXTERN_C_END
1663 
1664 EXTERN_C_BEGIN
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1667 int MatRetrieveValues_MPIAIJ(Mat mat)
1668 {
1669   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1670   int        ierr;
1671 
1672   PetscFunctionBegin;
1673   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1674   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 EXTERN_C_END
1678 
1679 #include "petscpc.h"
1680 EXTERN_C_BEGIN
1681 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1682 EXTERN_C_END
1683 
1684 EXTERN_C_BEGIN
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatCreate_MPIAIJ"
1687 int MatCreate_MPIAIJ(Mat B)
1688 {
1689   Mat_MPIAIJ *b;
1690   int        ierr,i,size;
1691 
1692   PetscFunctionBegin;
1693   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1694 
1695   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1696   B->data         = (void*)b;
1697   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1698   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1699   B->factor       = 0;
1700   B->assembled    = PETSC_FALSE;
1701   B->mapping      = 0;
1702 
1703   B->insertmode      = NOT_SET_VALUES;
1704   b->size            = size;
1705   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1706 
1707   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1708   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1709 
1710   /* the information in the maps duplicates the information computed below, eventually
1711      we should remove the duplicate information that is not contained in the maps */
1712   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1713   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1714 
1715   /* build local table of row and column ownerships */
1716   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1717   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1718   b->cowners = b->rowners + b->size + 2;
1719   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1720   b->rowners[0] = 0;
1721   for (i=2; i<=b->size; i++) {
1722     b->rowners[i] += b->rowners[i-1];
1723   }
1724   b->rstart = b->rowners[b->rank];
1725   b->rend   = b->rowners[b->rank+1];
1726   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1727   b->cowners[0] = 0;
1728   for (i=2; i<=b->size; i++) {
1729     b->cowners[i] += b->cowners[i-1];
1730   }
1731   b->cstart = b->cowners[b->rank];
1732   b->cend   = b->cowners[b->rank+1];
1733 
1734   /* build cache for off array entries formed */
1735   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1736   b->donotstash  = PETSC_FALSE;
1737   b->colmap      = 0;
1738   b->garray      = 0;
1739   b->roworiented = PETSC_TRUE;
1740 
1741   /* stuff used for matrix vector multiply */
1742   b->lvec      = PETSC_NULL;
1743   b->Mvctx     = PETSC_NULL;
1744 
1745   /* stuff for MatGetRow() */
1746   b->rowindices   = 0;
1747   b->rowvalues    = 0;
1748   b->getrowactive = PETSC_FALSE;
1749 
1750   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1751                                      "MatStoreValues_MPIAIJ",
1752                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1753   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1754                                      "MatRetrieveValues_MPIAIJ",
1755                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1756   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1757                                      "MatGetDiagonalBlock_MPIAIJ",
1758                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1759 
1760   PetscFunctionReturn(0);
1761 }
1762 EXTERN_C_END
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1766 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1767 {
1768   Mat        mat;
1769   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1770   int        ierr;
1771 
1772   PetscFunctionBegin;
1773   *newmat       = 0;
1774   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1775   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1776   a    = (Mat_MPIAIJ*)mat->data;
1777   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1778   mat->factor       = matin->factor;
1779   mat->assembled    = PETSC_TRUE;
1780   mat->insertmode   = NOT_SET_VALUES;
1781   mat->preallocated = PETSC_TRUE;
1782 
1783   a->rstart       = oldmat->rstart;
1784   a->rend         = oldmat->rend;
1785   a->cstart       = oldmat->cstart;
1786   a->cend         = oldmat->cend;
1787   a->size         = oldmat->size;
1788   a->rank         = oldmat->rank;
1789   a->donotstash   = oldmat->donotstash;
1790   a->roworiented  = oldmat->roworiented;
1791   a->rowindices   = 0;
1792   a->rowvalues    = 0;
1793   a->getrowactive = PETSC_FALSE;
1794 
1795   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1796   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1797   if (oldmat->colmap) {
1798 #if defined (PETSC_USE_CTABLE)
1799     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1800 #else
1801     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1802     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1803     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1804 #endif
1805   } else a->colmap = 0;
1806   if (oldmat->garray) {
1807     int len;
1808     len  = oldmat->B->n;
1809     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1810     PetscLogObjectMemory(mat,len*sizeof(int));
1811     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1812   } else a->garray = 0;
1813 
1814   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1815   PetscLogObjectParent(mat,a->lvec);
1816   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1817   PetscLogObjectParent(mat,a->Mvctx);
1818   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1819   PetscLogObjectParent(mat,a->A);
1820   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1821   PetscLogObjectParent(mat,a->B);
1822   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1823   *newmat = mat;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #include "petscsys.h"
1828 
1829 EXTERN_C_BEGIN
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatLoad_MPIAIJ"
1832 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1833 {
1834   Mat          A;
1835   PetscScalar  *vals,*svals;
1836   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1837   MPI_Status   status;
1838   int          i,nz,ierr,j,rstart,rend,fd;
1839   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1840   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1841   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1842 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST)
1843   PetscTruth   flag;
1844 #endif
1845 
1846   PetscFunctionBegin;
1847   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1848   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1849   if (!rank) {
1850     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1851     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1852     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1853     if (header[3] < 0) {
1854       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1855     }
1856   }
1857 
1858   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1859   M = header[1]; N = header[2];
1860   /* determine ownership of all rows */
1861   m = M/size + ((M % size) > rank);
1862   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1863   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1864   rowners[0] = 0;
1865   for (i=2; i<=size; i++) {
1866     rowners[i] += rowners[i-1];
1867   }
1868   rstart = rowners[rank];
1869   rend   = rowners[rank+1];
1870 
1871   /* distribute row lengths to all processors */
1872   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1873   offlens = ourlens + (rend-rstart);
1874   if (!rank) {
1875     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1876     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1877     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1878     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1879     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1880     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1881   } else {
1882     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1883   }
1884 
1885   if (!rank) {
1886     /* calculate the number of nonzeros on each processor */
1887     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1888     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1889     for (i=0; i<size; i++) {
1890       for (j=rowners[i]; j< rowners[i+1]; j++) {
1891         procsnz[i] += rowlengths[j];
1892       }
1893     }
1894     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1895 
1896     /* determine max buffer needed and allocate it */
1897     maxnz = 0;
1898     for (i=0; i<size; i++) {
1899       maxnz = PetscMax(maxnz,procsnz[i]);
1900     }
1901     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1902 
1903     /* read in my part of the matrix column indices  */
1904     nz   = procsnz[0];
1905     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1906     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1907 
1908     /* read in every one elses and ship off */
1909     for (i=1; i<size; i++) {
1910       nz   = procsnz[i];
1911       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1912       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1913     }
1914     ierr = PetscFree(cols);CHKERRQ(ierr);
1915   } else {
1916     /* determine buffer space needed for message */
1917     nz = 0;
1918     for (i=0; i<m; i++) {
1919       nz += ourlens[i];
1920     }
1921     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1922 
1923     /* receive message of column indices*/
1924     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1925     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1926     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1927   }
1928 
1929   /* determine column ownership if matrix is not square */
1930   if (N != M) {
1931     n      = N/size + ((N % size) > rank);
1932     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1933     cstart = cend - n;
1934   } else {
1935     cstart = rstart;
1936     cend   = rend;
1937     n      = cend - cstart;
1938   }
1939 
1940   /* loop over local rows, determining number of off diagonal entries */
1941   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1942   jj = 0;
1943   for (i=0; i<m; i++) {
1944     for (j=0; j<ourlens[i]; j++) {
1945       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1946       jj++;
1947     }
1948   }
1949 
1950   /* create our matrix */
1951   for (i=0; i<m; i++) {
1952     ourlens[i] -= offlens[i];
1953   }
1954   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1955   A = *newmat;
1956   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1957   for (i=0; i<m; i++) {
1958     ourlens[i] += offlens[i];
1959   }
1960 
1961   if (!rank) {
1962     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1963 
1964     /* read in my part of the matrix numerical values  */
1965     nz   = procsnz[0];
1966     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1967 
1968     /* insert into matrix */
1969     jj      = rstart;
1970     smycols = mycols;
1971     svals   = vals;
1972     for (i=0; i<m; i++) {
1973       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1974       smycols += ourlens[i];
1975       svals   += ourlens[i];
1976       jj++;
1977     }
1978 
1979     /* read in other processors and ship out */
1980     for (i=1; i<size; i++) {
1981       nz   = procsnz[i];
1982       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1983       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1984     }
1985     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1986   } else {
1987     /* receive numeric values */
1988     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1989 
1990     /* receive message of values*/
1991     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1992     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1993     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1994 
1995     /* insert into matrix */
1996     jj      = rstart;
1997     smycols = mycols;
1998     svals   = vals;
1999     for (i=0; i<m; i++) {
2000       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2001       smycols += ourlens[i];
2002       svals   += ourlens[i];
2003       jj++;
2004     }
2005   }
2006   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2007   ierr = PetscFree(vals);CHKERRQ(ierr);
2008   ierr = PetscFree(mycols);CHKERRQ(ierr);
2009   ierr = PetscFree(rowners);CHKERRQ(ierr);
2010 
2011   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2012   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013 #if defined(PETSC_HAVE_SPOOLES)
2014   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2015   if (flag) {
2016     if (size == 1) {
2017       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2018     } else {
2019       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2020     }
2021   }
2022 #endif
2023 #if defined(PETSC_HAVE_SUPERLUDIST)
2024   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2025   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2026 #endif
2027   PetscFunctionReturn(0);
2028 }
2029 EXTERN_C_END
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2033 /*
2034     Not great since it makes two copies of the submatrix, first an SeqAIJ
2035   in local and then by concatenating the local matrices the end result.
2036   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2037 */
2038 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2039 {
2040   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2041   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2042   Mat          *local,M,Mreuse;
2043   PetscScalar  *vwork,*aa;
2044   MPI_Comm     comm = mat->comm;
2045   Mat_SeqAIJ   *aij;
2046 
2047 
2048   PetscFunctionBegin;
2049   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2050   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2051 
2052   if (call ==  MAT_REUSE_MATRIX) {
2053     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2054     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2055     local = &Mreuse;
2056     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2057   } else {
2058     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2059     Mreuse = *local;
2060     ierr   = PetscFree(local);CHKERRQ(ierr);
2061   }
2062 
2063   /*
2064       m - number of local rows
2065       n - number of columns (same on all processors)
2066       rstart - first row in new global matrix generated
2067   */
2068   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2069   if (call == MAT_INITIAL_MATRIX) {
2070     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2071     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2072     ii  = aij->i;
2073     jj  = aij->j;
2074 
2075     /*
2076         Determine the number of non-zeros in the diagonal and off-diagonal
2077         portions of the matrix in order to do correct preallocation
2078     */
2079 
2080     /* first get start and end of "diagonal" columns */
2081     if (csize == PETSC_DECIDE) {
2082       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2083       if (mglobal == n) { /* square matrix */
2084 	nlocal = m;
2085       } else {
2086         nlocal = n/size + ((n % size) > rank);
2087       }
2088     } else {
2089       nlocal = csize;
2090     }
2091     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2092     rstart = rend - nlocal;
2093     if (rank == size - 1 && rend != n) {
2094       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2095     }
2096 
2097     /* next, compute all the lengths */
2098     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2099     olens = dlens + m;
2100     for (i=0; i<m; i++) {
2101       jend = ii[i+1] - ii[i];
2102       olen = 0;
2103       dlen = 0;
2104       for (j=0; j<jend; j++) {
2105         if (*jj < rstart || *jj >= rend) olen++;
2106         else dlen++;
2107         jj++;
2108       }
2109       olens[i] = olen;
2110       dlens[i] = dlen;
2111     }
2112     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2113     ierr = PetscFree(dlens);CHKERRQ(ierr);
2114   } else {
2115     int ml,nl;
2116 
2117     M = *newmat;
2118     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2119     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2120     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2121     /*
2122          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2123        rather than the slower MatSetValues().
2124     */
2125     M->was_assembled = PETSC_TRUE;
2126     M->assembled     = PETSC_FALSE;
2127   }
2128   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2129   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2130   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2131   ii  = aij->i;
2132   jj  = aij->j;
2133   aa  = aij->a;
2134   for (i=0; i<m; i++) {
2135     row   = rstart + i;
2136     nz    = ii[i+1] - ii[i];
2137     cwork = jj;     jj += nz;
2138     vwork = aa;     aa += nz;
2139     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2140   }
2141 
2142   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2144   *newmat = M;
2145 
2146   /* save submatrix used in processor for next request */
2147   if (call ==  MAT_INITIAL_MATRIX) {
2148     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2149     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2150   }
2151 
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2157 /*@C
2158    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2159    (the default parallel PETSc format).  For good matrix assembly performance
2160    the user should preallocate the matrix storage by setting the parameters
2161    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2162    performance can be increased by more than a factor of 50.
2163 
2164    Collective on MPI_Comm
2165 
2166    Input Parameters:
2167 +  A - the matrix
2168 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2169            (same value is used for all local rows)
2170 .  d_nnz - array containing the number of nonzeros in the various rows of the
2171            DIAGONAL portion of the local submatrix (possibly different for each row)
2172            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2173            The size of this array is equal to the number of local rows, i.e 'm'.
2174            You must leave room for the diagonal entry even if it is zero.
2175 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2176            submatrix (same value is used for all local rows).
2177 -  o_nnz - array containing the number of nonzeros in the various rows of the
2178            OFF-DIAGONAL portion of the local submatrix (possibly different for
2179            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2180            structure. The size of this array is equal to the number
2181            of local rows, i.e 'm'.
2182 
2183    The AIJ format (also called the Yale sparse matrix format or
2184    compressed row storage), is fully compatible with standard Fortran 77
2185    storage.  That is, the stored row and column indices can begin at
2186    either one (as in Fortran) or zero.  See the users manual for details.
2187 
2188    The user MUST specify either the local or global matrix dimensions
2189    (possibly both).
2190 
2191    The parallel matrix is partitioned such that the first m0 rows belong to
2192    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2193    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2194 
2195    The DIAGONAL portion of the local submatrix of a processor can be defined
2196    as the submatrix which is obtained by extraction the part corresponding
2197    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2198    first row that belongs to the processor, and r2 is the last row belonging
2199    to the this processor. This is a square mxm matrix. The remaining portion
2200    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2201 
2202    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2203 
2204    By default, this format uses inodes (identical nodes) when possible.
2205    We search for consecutive rows with the same nonzero structure, thereby
2206    reusing matrix information to achieve increased efficiency.
2207 
2208    Options Database Keys:
2209 +  -mat_aij_no_inode  - Do not use inodes
2210 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2211 -  -mat_aij_oneindex - Internally use indexing starting at 1
2212         rather than 0.  Note that when calling MatSetValues(),
2213         the user still MUST index entries starting at 0!
2214 
2215    Example usage:
2216 
2217    Consider the following 8x8 matrix with 34 non-zero values, that is
2218    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2219    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2220    as follows:
2221 
2222 .vb
2223             1  2  0  |  0  3  0  |  0  4
2224     Proc0   0  5  6  |  7  0  0  |  8  0
2225             9  0 10  | 11  0  0  | 12  0
2226     -------------------------------------
2227            13  0 14  | 15 16 17  |  0  0
2228     Proc1   0 18  0  | 19 20 21  |  0  0
2229             0  0  0  | 22 23  0  | 24  0
2230     -------------------------------------
2231     Proc2  25 26 27  |  0  0 28  | 29  0
2232            30  0  0  | 31 32 33  |  0 34
2233 .ve
2234 
2235    This can be represented as a collection of submatrices as:
2236 
2237 .vb
2238       A B C
2239       D E F
2240       G H I
2241 .ve
2242 
2243    Where the submatrices A,B,C are owned by proc0, D,E,F are
2244    owned by proc1, G,H,I are owned by proc2.
2245 
2246    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2247    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2248    The 'M','N' parameters are 8,8, and have the same values on all procs.
2249 
2250    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2251    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2252    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2253    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2254    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2255    matrix, ans [DF] as another SeqAIJ matrix.
2256 
2257    When d_nz, o_nz parameters are specified, d_nz storage elements are
2258    allocated for every row of the local diagonal submatrix, and o_nz
2259    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2260    One way to choose d_nz and o_nz is to use the max nonzerors per local
2261    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2262    In this case, the values of d_nz,o_nz are:
2263 .vb
2264      proc0 : dnz = 2, o_nz = 2
2265      proc1 : dnz = 3, o_nz = 2
2266      proc2 : dnz = 1, o_nz = 4
2267 .ve
2268    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2269    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2270    for proc3. i.e we are using 12+15+10=37 storage locations to store
2271    34 values.
2272 
2273    When d_nnz, o_nnz parameters are specified, the storage is specified
2274    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2275    In the above case the values for d_nnz,o_nnz are:
2276 .vb
2277      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2278      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2279      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2280 .ve
2281    Here the space allocated is sum of all the above values i.e 34, and
2282    hence pre-allocation is perfect.
2283 
2284    Level: intermediate
2285 
2286 .keywords: matrix, aij, compressed row, sparse, parallel
2287 
2288 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2289 @*/
2290 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2291 {
2292   Mat_MPIAIJ   *b;
2293   int          ierr,i;
2294   PetscTruth   flg2;
2295 
2296   PetscFunctionBegin;
2297   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2298   if (!flg2) PetscFunctionReturn(0);
2299   B->preallocated = PETSC_TRUE;
2300   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2301   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2302   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2303   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2304   if (d_nnz) {
2305     for (i=0; i<B->m; i++) {
2306       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2307     }
2308   }
2309   if (o_nnz) {
2310     for (i=0; i<B->m; i++) {
2311       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2312     }
2313   }
2314   b = (Mat_MPIAIJ*)B->data;
2315 
2316   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2317   PetscLogObjectParent(B,b->A);
2318   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2319   PetscLogObjectParent(B,b->B);
2320 
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatCreateMPIAIJ"
2326 /*@C
2327    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2328    (the default parallel PETSc format).  For good matrix assembly performance
2329    the user should preallocate the matrix storage by setting the parameters
2330    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2331    performance can be increased by more than a factor of 50.
2332 
2333    Collective on MPI_Comm
2334 
2335    Input Parameters:
2336 +  comm - MPI communicator
2337 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2338            This value should be the same as the local size used in creating the
2339            y vector for the matrix-vector product y = Ax.
2340 .  n - This value should be the same as the local size used in creating the
2341        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2342        calculated if N is given) For square matrices n is almost always m.
2343 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2344 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2345 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2346            (same value is used for all local rows)
2347 .  d_nnz - array containing the number of nonzeros in the various rows of the
2348            DIAGONAL portion of the local submatrix (possibly different for each row)
2349            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2350            The size of this array is equal to the number of local rows, i.e 'm'.
2351            You must leave room for the diagonal entry even if it is zero.
2352 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2353            submatrix (same value is used for all local rows).
2354 -  o_nnz - array containing the number of nonzeros in the various rows of the
2355            OFF-DIAGONAL portion of the local submatrix (possibly different for
2356            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2357            structure. The size of this array is equal to the number
2358            of local rows, i.e 'm'.
2359 
2360    Output Parameter:
2361 .  A - the matrix
2362 
2363    Notes:
2364    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2365    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2366    storage requirements for this matrix.
2367 
2368    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2369    processor than it must be used on all processors that share the object for
2370    that argument.
2371 
2372    The AIJ format (also called the Yale sparse matrix format or
2373    compressed row storage), is fully compatible with standard Fortran 77
2374    storage.  That is, the stored row and column indices can begin at
2375    either one (as in Fortran) or zero.  See the users manual for details.
2376 
2377    The user MUST specify either the local or global matrix dimensions
2378    (possibly both).
2379 
2380    The parallel matrix is partitioned such that the first m0 rows belong to
2381    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2382    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2383 
2384    The DIAGONAL portion of the local submatrix of a processor can be defined
2385    as the submatrix which is obtained by extraction the part corresponding
2386    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2387    first row that belongs to the processor, and r2 is the last row belonging
2388    to the this processor. This is a square mxm matrix. The remaining portion
2389    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2390 
2391    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2392 
2393    By default, this format uses inodes (identical nodes) when possible.
2394    We search for consecutive rows with the same nonzero structure, thereby
2395    reusing matrix information to achieve increased efficiency.
2396 
2397    Options Database Keys:
2398 +  -mat_aij_no_inode  - Do not use inodes
2399 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2400 -  -mat_aij_oneindex - Internally use indexing starting at 1
2401         rather than 0.  Note that when calling MatSetValues(),
2402         the user still MUST index entries starting at 0!
2403 
2404 
2405    Example usage:
2406 
2407    Consider the following 8x8 matrix with 34 non-zero values, that is
2408    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2409    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2410    as follows:
2411 
2412 .vb
2413             1  2  0  |  0  3  0  |  0  4
2414     Proc0   0  5  6  |  7  0  0  |  8  0
2415             9  0 10  | 11  0  0  | 12  0
2416     -------------------------------------
2417            13  0 14  | 15 16 17  |  0  0
2418     Proc1   0 18  0  | 19 20 21  |  0  0
2419             0  0  0  | 22 23  0  | 24  0
2420     -------------------------------------
2421     Proc2  25 26 27  |  0  0 28  | 29  0
2422            30  0  0  | 31 32 33  |  0 34
2423 .ve
2424 
2425    This can be represented as a collection of submatrices as:
2426 
2427 .vb
2428       A B C
2429       D E F
2430       G H I
2431 .ve
2432 
2433    Where the submatrices A,B,C are owned by proc0, D,E,F are
2434    owned by proc1, G,H,I are owned by proc2.
2435 
2436    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2437    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2438    The 'M','N' parameters are 8,8, and have the same values on all procs.
2439 
2440    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2441    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2442    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2443    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2444    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2445    matrix, ans [DF] as another SeqAIJ matrix.
2446 
2447    When d_nz, o_nz parameters are specified, d_nz storage elements are
2448    allocated for every row of the local diagonal submatrix, and o_nz
2449    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2450    One way to choose d_nz and o_nz is to use the max nonzerors per local
2451    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2452    In this case, the values of d_nz,o_nz are:
2453 .vb
2454      proc0 : dnz = 2, o_nz = 2
2455      proc1 : dnz = 3, o_nz = 2
2456      proc2 : dnz = 1, o_nz = 4
2457 .ve
2458    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2459    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2460    for proc3. i.e we are using 12+15+10=37 storage locations to store
2461    34 values.
2462 
2463    When d_nnz, o_nnz parameters are specified, the storage is specified
2464    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2465    In the above case the values for d_nnz,o_nnz are:
2466 .vb
2467      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2468      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2469      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2470 .ve
2471    Here the space allocated is sum of all the above values i.e 34, and
2472    hence pre-allocation is perfect.
2473 
2474    Level: intermediate
2475 
2476 .keywords: matrix, aij, compressed row, sparse, parallel
2477 
2478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2479 @*/
2480 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2481 {
2482   int ierr,size;
2483 
2484   PetscFunctionBegin;
2485   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2486   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2487   if (size > 1) {
2488     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2489     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2490   } else {
2491     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2492     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2493   }
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #undef __FUNCT__
2498 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2499 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2500 {
2501   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2502   PetscFunctionBegin;
2503   *Ad     = a->A;
2504   *Ao     = a->B;
2505   *colmap = a->garray;
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2511 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2512 {
2513   int        ierr,i;
2514   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2515 
2516   PetscFunctionBegin;
2517   if (coloring->ctype == IS_COLORING_LOCAL) {
2518     ISColoringValue *allcolors,*colors;
2519     ISColoring      ocoloring;
2520 
2521     /* set coloring for diagonal portion */
2522     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2523 
2524     /* set coloring for off-diagonal portion */
2525     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2526     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2527     for (i=0; i<a->B->n; i++) {
2528       colors[i] = allcolors[a->garray[i]];
2529     }
2530     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2531     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2532     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2533     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2534   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2535     ISColoringValue *colors;
2536     int             *larray;
2537     ISColoring      ocoloring;
2538 
2539     /* set coloring for diagonal portion */
2540     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2541     for (i=0; i<a->A->n; i++) {
2542       larray[i] = i + a->cstart;
2543     }
2544     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2545     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2546     for (i=0; i<a->A->n; i++) {
2547       colors[i] = coloring->colors[larray[i]];
2548     }
2549     ierr = PetscFree(larray);CHKERRQ(ierr);
2550     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2551     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2552     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2553 
2554     /* set coloring for off-diagonal portion */
2555     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2556     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2557     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2558     for (i=0; i<a->B->n; i++) {
2559       colors[i] = coloring->colors[larray[i]];
2560     }
2561     ierr = PetscFree(larray);CHKERRQ(ierr);
2562     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2563     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2564     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2565   } else {
2566     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2567   }
2568 
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2574 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2575 {
2576   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2577   int        ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2581   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 #undef __FUNCT__
2586 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2587 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2588 {
2589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2590   int        ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2594   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 #undef __FUNCT__
2599 #define __FUNCT__ "MatMerge"
2600 /*@C
2601       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2602                  matrices from each processor
2603 
2604     Collective on MPI_Comm
2605 
2606    Input Parameters:
2607 +    comm - the communicators the parallel matrix will live on
2608 -    inmat - the input sequential matrices
2609 
2610    Output Parameter:
2611 .    outmat - the parallel matrix generated
2612 
2613     Level: advanced
2614 
2615    Notes: The number of columns of the matrix in EACH of the seperate files
2616       MUST be the same.
2617 
2618 @*/
2619 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2620 {
2621   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2622   PetscScalar *values;
2623   PetscMap    columnmap,rowmap;
2624 
2625   PetscFunctionBegin;
2626 
2627   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2628 
2629   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2630   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2631   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2632   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2633   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2634   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2635 
2636   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2637   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2638   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2639   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2640   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2641 
2642   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2643   for (i=0;i<m;i++) {
2644     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2645     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2646     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2647   }
2648   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2649   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2650 
2651   for (i=0;i<m;i++) {
2652     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2653     I    = i + rstart;
2654     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2655     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2656   }
2657   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2658   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2659   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2660 
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatFileSplit"
2666 int MatFileSplit(Mat A,char *outfile)
2667 {
2668   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2669   PetscViewer out;
2670   char        *name;
2671   Mat         B;
2672   PetscScalar *values;
2673 
2674   PetscFunctionBegin;
2675 
2676   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2677   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2678   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2679   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2680   for (i=0;i<m;i++) {
2681     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2682     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2683     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2684   }
2685   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2686   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2687 
2688   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2689   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2690   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2691   sprintf(name,"%s.%d",outfile,rank);
2692   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2693   ierr = PetscFree(name);
2694   ierr = MatView(B,out);CHKERRQ(ierr);
2695   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2696   ierr = MatDestroy(B);CHKERRQ(ierr);
2697   PetscFunctionReturn(0);
2698 }
2699