xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f4cce38b91551f24ef643c967e2de7c2d2909fa1)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
426 int MatZeroEntries_MPIAIJ(Mat A)
427 {
428   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
429   int        ierr;
430 
431   PetscFunctionBegin;
432   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
433   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "MatZeroRows_MPIAIJ"
439 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
440 {
441   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
442   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
443   int            *nprocs,j,idx,nsends,row;
444   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
445   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
446   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
447   MPI_Comm       comm = A->comm;
448   MPI_Request    *send_waits,*recv_waits;
449   MPI_Status     recv_status,*send_status;
450   IS             istmp;
451   PetscTruth     found;
452 
453   PetscFunctionBegin;
454   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
455   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
456 
457   /*  first count number of contributors to each processor */
458   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
459   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
460   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
461   for (i=0; i<N; i++) {
462     idx = rows[i];
463     found = PETSC_FALSE;
464     for (j=0; j<size; j++) {
465       if (idx >= owners[j] && idx < owners[j+1]) {
466         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
467       }
468     }
469     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
470   }
471   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
472 
473   /* inform other processors of number of messages and max length*/
474   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
475 
476   /* post receives:   */
477   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
478   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
479   for (i=0; i<nrecvs; i++) {
480     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
481   }
482 
483   /* do sends:
484       1) starts[i] gives the starting index in svalues for stuff going to
485          the ith processor
486   */
487   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
488   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
489   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
490   starts[0] = 0;
491   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   for (i=0; i<N; i++) {
493     svalues[starts[owner[i]]++] = rows[i];
494   }
495   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
496 
497   starts[0] = 0;
498   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
499   count = 0;
500   for (i=0; i<size; i++) {
501     if (nprocs[2*i+1]) {
502       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
503     }
504   }
505   ierr = PetscFree(starts);CHKERRQ(ierr);
506 
507   base = owners[rank];
508 
509   /*  wait on receives */
510   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
511   source = lens + nrecvs;
512   count  = nrecvs; slen = 0;
513   while (count) {
514     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
515     /* unpack receives into our local space */
516     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
517     source[imdex]  = recv_status.MPI_SOURCE;
518     lens[imdex]    = n;
519     slen          += n;
520     count--;
521   }
522   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
523 
524   /* move the data into the send scatter */
525   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
526   count = 0;
527   for (i=0; i<nrecvs; i++) {
528     values = rvalues + i*nmax;
529     for (j=0; j<lens[i]; j++) {
530       lrows[count++] = values[j] - base;
531     }
532   }
533   ierr = PetscFree(rvalues);CHKERRQ(ierr);
534   ierr = PetscFree(lens);CHKERRQ(ierr);
535   ierr = PetscFree(owner);CHKERRQ(ierr);
536   ierr = PetscFree(nprocs);CHKERRQ(ierr);
537 
538   /* actually zap the local rows */
539   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
540   PetscLogObjectParent(A,istmp);
541 
542   /*
543         Zero the required rows. If the "diagonal block" of the matrix
544      is square and the user wishes to set the diagonal we use seperate
545      code so that MatSetValues() is not called for each diagonal allocating
546      new memory, thus calling lots of mallocs and slowing things down.
547 
548        Contributed by: Mathew Knepley
549   */
550   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
551   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
552   if (diag && (l->A->M == l->A->N)) {
553     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
554   } else if (diag) {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
557       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
559     }
560     for (i = 0; i < slen; i++) {
561       row  = lrows[i] + rstart;
562       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
563     }
564     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566   } else {
567     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
568   }
569   ierr = ISDestroy(istmp);CHKERRQ(ierr);
570   ierr = PetscFree(lrows);CHKERRQ(ierr);
571 
572   /* wait on sends */
573   if (nsends) {
574     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
575     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
576     ierr = PetscFree(send_status);CHKERRQ(ierr);
577   }
578   ierr = PetscFree(send_waits);CHKERRQ(ierr);
579   ierr = PetscFree(svalues);CHKERRQ(ierr);
580 
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMult_MPIAIJ"
586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr,nt;
590 
591   PetscFunctionBegin;
592   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
593   if (nt != A->n) {
594     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
595   }
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNCT__
604 #define __FUNCT__ "MatMultAdd_MPIAIJ"
605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
612   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
613   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
614   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
621 {
622   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
623   int        ierr;
624 
625   PetscFunctionBegin;
626   /* do nondiagonal part */
627   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
628   /* send it on its way */
629   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   /* do local part */
631   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
632   /* receive remote parts: note this assumes the values are not actually */
633   /* inserted in yy until the next line, which is true for my implementation*/
634   /* but is not perhaps always true. */
635   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
641 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
642 {
643   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
644   int        ierr;
645 
646   PetscFunctionBegin;
647   /* do nondiagonal part */
648   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
649   /* send it on its way */
650   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   /* do local part */
652   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
653   /* receive remote parts: note this assumes the values are not actually */
654   /* inserted in yy until the next line, which is true for my implementation*/
655   /* but is not perhaps always true. */
656   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 /*
661   This only works correctly for square matrices where the subblock A->A is the
662    diagonal block
663 */
664 #undef __FUNCT__
665 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
666 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
667 {
668   int        ierr;
669   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
670 
671   PetscFunctionBegin;
672   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
673   if (a->rstart != a->cstart || a->rend != a->cend) {
674     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
675   }
676   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatScale_MPIAIJ"
682 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
683 {
684   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
685   int        ierr;
686 
687   PetscFunctionBegin;
688   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
689   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatDestroy_MPIAIJ"
695 int MatDestroy_MPIAIJ(Mat mat)
696 {
697   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
698   int        ierr;
699 
700   PetscFunctionBegin;
701 #if defined(PETSC_USE_LOG)
702   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
703 #endif
704   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
705   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
706   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
707   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
708 #if defined (PETSC_USE_CTABLE)
709   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
710 #else
711   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
712 #endif
713   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
714   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
715   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
716   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
717   ierr = PetscFree(aij);CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
722 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatView_MPIAIJ_Binary"
726 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
727 {
728   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
729   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
730   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
731   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
732   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
733   PetscScalar       *column_values;
734 
735   PetscFunctionBegin;
736   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
737   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
738   nz   = A->nz + B->nz;
739   if (rank == 0) {
740     header[0] = MAT_FILE_COOKIE;
741     header[1] = mat->M;
742     header[2] = mat->N;
743     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
744     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
745     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
746     /* get largest number of rows any processor has */
747     rlen = mat->m;
748     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
749     for (i=1; i<size; i++) {
750       rlen = PetscMax(rlen,range[i+1] - range[i]);
751     }
752   } else {
753     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
754     rlen = mat->m;
755   }
756 
757   /* load up the local row counts */
758   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
759   for (i=0; i<mat->m; i++) {
760     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
761   }
762 
763   /* store the row lengths to the file */
764   if (rank == 0) {
765     MPI_Status status;
766     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
767     for (i=1; i<size; i++) {
768       rlen = range[i+1] - range[i];
769       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
770       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
771     }
772   } else {
773     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
774   }
775   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
776 
777   /* load up the local column indices */
778   nzmax = nz; /* )th processor needs space a largest processor needs */
779   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
780   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
781   cnt  = 0;
782   for (i=0; i<mat->m; i++) {
783     for (j=B->i[i]; j<B->i[i+1]; j++) {
784       if ( (col = garray[B->j[j]]) > cstart) break;
785       column_indices[cnt++] = col;
786     }
787     for (k=A->i[i]; k<A->i[i+1]; k++) {
788       column_indices[cnt++] = A->j[k] + cstart;
789     }
790     for (; j<B->i[i+1]; j++) {
791       column_indices[cnt++] = garray[B->j[j]];
792     }
793   }
794   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
795 
796   /* store the column indices to the file */
797   if (rank == 0) {
798     MPI_Status status;
799     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
800     for (i=1; i<size; i++) {
801       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
802       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
803       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
804       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
805     }
806   } else {
807     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
808     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(column_indices);CHKERRQ(ierr);
811 
812   /* load up the local column values */
813   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
814   cnt  = 0;
815   for (i=0; i<mat->m; i++) {
816     for (j=B->i[i]; j<B->i[i+1]; j++) {
817       if ( garray[B->j[j]] > cstart) break;
818       column_values[cnt++] = B->a[j];
819     }
820     for (k=A->i[i]; k<A->i[i+1]; k++) {
821       column_values[cnt++] = A->a[k];
822     }
823     for (; j<B->i[i+1]; j++) {
824       column_values[cnt++] = B->a[j];
825     }
826   }
827   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
828 
829   /* store the column values to the file */
830   if (rank == 0) {
831     MPI_Status status;
832     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
833     for (i=1; i<size; i++) {
834       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
835       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
836       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
837       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
838     }
839   } else {
840     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
841     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(column_values);CHKERRQ(ierr);
844   PetscFunctionReturn(0);
845 }
846 
847 #undef __FUNCT__
848 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
849 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
850 {
851   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
852   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
853   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
854   PetscTruth        isdraw,isascii,flg,isbinary;
855   PetscViewer       sviewer;
856   PetscViewerFormat format;
857 
858   PetscFunctionBegin;
859   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
860   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
861   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
862   if (isascii) {
863     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
864     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
865       MatInfo info;
866       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
867       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
868       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
869       if (flg) {
870         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
871 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
872       } else {
873         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
874 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
875       }
876       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
877       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
878       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
879       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
880       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
881       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
882       PetscFunctionReturn(0);
883     } else if (format == PETSC_VIEWER_ASCII_INFO) {
884       PetscFunctionReturn(0);
885     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
886 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
887       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
888 #endif
889 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
890       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
891 #endif
892 
893       PetscFunctionReturn(0);
894     }
895   } else if (isbinary) {
896     if (size == 1) {
897       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
898       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
899     } else {
900       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
901     }
902     PetscFunctionReturn(0);
903   } else if (isdraw) {
904     PetscDraw  draw;
905     PetscTruth isnull;
906     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
907     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
908   }
909 
910   if (size == 1) {
911     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
912     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
913   } else {
914     /* assemble the entire matrix onto first processor. */
915     Mat         A;
916     Mat_SeqAIJ *Aloc;
917     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
918     PetscScalar *a;
919 
920     if (!rank) {
921       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
922     } else {
923       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
924     }
925     PetscLogObjectParent(mat,A);
926 
927     /* copy over the A part */
928     Aloc = (Mat_SeqAIJ*)aij->A->data;
929     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
930     row = aij->rstart;
931     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
932     for (i=0; i<m; i++) {
933       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
934       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
935     }
936     aj = Aloc->j;
937     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
938 
939     /* copy over the B part */
940     Aloc = (Mat_SeqAIJ*)aij->B->data;
941     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
942     row  = aij->rstart;
943     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
944     ct   = cols;
945     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
946     for (i=0; i<m; i++) {
947       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
948       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
949     }
950     ierr = PetscFree(ct);CHKERRQ(ierr);
951     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
952     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     /*
954        Everyone has to call to draw the matrix since the graphics waits are
955        synchronized across all processors that share the PetscDraw object
956     */
957     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
958     if (!rank) {
959       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
960       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
961     }
962     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
963     ierr = MatDestroy(A);CHKERRQ(ierr);
964   }
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatView_MPIAIJ"
970 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
971 {
972   int        ierr;
973   PetscTruth isascii,isdraw,issocket,isbinary;
974 
975   PetscFunctionBegin;
976   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
977   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
978   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
979   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
980   if (isascii || isdraw || isbinary || issocket) {
981     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
982   } else {
983     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
984   }
985   PetscFunctionReturn(0);
986 }
987 
988 
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatRelax_MPIAIJ"
992 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
993 {
994   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
995   int          ierr;
996   Vec          bb1;
997   PetscScalar  mone=-1.0;
998 
999   PetscFunctionBegin;
1000   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1001 
1002   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1003 
1004   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1005     if (flag & SOR_ZERO_INITIAL_GUESS) {
1006       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1007       its--;
1008     }
1009 
1010     while (its--) {
1011       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1012       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1013 
1014       /* update rhs: bb1 = bb - B*x */
1015       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1016       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1017 
1018       /* local sweep */
1019       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1020       CHKERRQ(ierr);
1021     }
1022   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1023     if (flag & SOR_ZERO_INITIAL_GUESS) {
1024       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1025       its--;
1026     }
1027     while (its--) {
1028       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1029       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1030 
1031       /* update rhs: bb1 = bb - B*x */
1032       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1033       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1034 
1035       /* local sweep */
1036       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1037       CHKERRQ(ierr);
1038     }
1039   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1040     if (flag & SOR_ZERO_INITIAL_GUESS) {
1041       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1042       its--;
1043     }
1044     while (its--) {
1045       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1046       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047 
1048       /* update rhs: bb1 = bb - B*x */
1049       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1050       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1051 
1052       /* local sweep */
1053       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1054       CHKERRQ(ierr);
1055     }
1056   } else {
1057     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1058   }
1059 
1060   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1066 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1067 {
1068   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1069   Mat        A = mat->A,B = mat->B;
1070   int        ierr;
1071   PetscReal  isend[5],irecv[5];
1072 
1073   PetscFunctionBegin;
1074   info->block_size     = 1.0;
1075   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1076   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1077   isend[3] = info->memory;  isend[4] = info->mallocs;
1078   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1079   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1080   isend[3] += info->memory;  isend[4] += info->mallocs;
1081   if (flag == MAT_LOCAL) {
1082     info->nz_used      = isend[0];
1083     info->nz_allocated = isend[1];
1084     info->nz_unneeded  = isend[2];
1085     info->memory       = isend[3];
1086     info->mallocs      = isend[4];
1087   } else if (flag == MAT_GLOBAL_MAX) {
1088     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1089     info->nz_used      = irecv[0];
1090     info->nz_allocated = irecv[1];
1091     info->nz_unneeded  = irecv[2];
1092     info->memory       = irecv[3];
1093     info->mallocs      = irecv[4];
1094   } else if (flag == MAT_GLOBAL_SUM) {
1095     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1096     info->nz_used      = irecv[0];
1097     info->nz_allocated = irecv[1];
1098     info->nz_unneeded  = irecv[2];
1099     info->memory       = irecv[3];
1100     info->mallocs      = irecv[4];
1101   }
1102   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1103   info->fill_ratio_needed = 0;
1104   info->factor_mallocs    = 0;
1105   info->rows_global       = (double)matin->M;
1106   info->columns_global    = (double)matin->N;
1107   info->rows_local        = (double)matin->m;
1108   info->columns_local     = (double)matin->N;
1109 
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatSetOption_MPIAIJ"
1115 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1116 {
1117   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1118   int        ierr;
1119 
1120   PetscFunctionBegin;
1121   switch (op) {
1122   case MAT_NO_NEW_NONZERO_LOCATIONS:
1123   case MAT_YES_NEW_NONZERO_LOCATIONS:
1124   case MAT_COLUMNS_UNSORTED:
1125   case MAT_COLUMNS_SORTED:
1126   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1127   case MAT_KEEP_ZEROED_ROWS:
1128   case MAT_NEW_NONZERO_LOCATION_ERR:
1129   case MAT_USE_INODES:
1130   case MAT_DO_NOT_USE_INODES:
1131   case MAT_IGNORE_ZERO_ENTRIES:
1132     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1133     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1134     break;
1135   case MAT_ROW_ORIENTED:
1136     a->roworiented = PETSC_TRUE;
1137     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1138     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1139     break;
1140   case MAT_ROWS_SORTED:
1141   case MAT_ROWS_UNSORTED:
1142   case MAT_YES_NEW_DIAGONALS:
1143     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1144     break;
1145   case MAT_COLUMN_ORIENTED:
1146     a->roworiented = PETSC_FALSE;
1147     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1148     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1149     break;
1150   case MAT_IGNORE_OFF_PROC_ENTRIES:
1151     a->donotstash = PETSC_TRUE;
1152     break;
1153   case MAT_NO_NEW_DIAGONALS:
1154     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1155   default:
1156     SETERRQ(PETSC_ERR_SUP,"unknown option");
1157   }
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatGetRow_MPIAIJ"
1163 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1164 {
1165   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1166   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1167   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1168   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1169   int          *cmap,*idx_p;
1170 
1171   PetscFunctionBegin;
1172   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1173   mat->getrowactive = PETSC_TRUE;
1174 
1175   if (!mat->rowvalues && (idx || v)) {
1176     /*
1177         allocate enough space to hold information from the longest row.
1178     */
1179     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1180     int     max = 1,tmp;
1181     for (i=0; i<matin->m; i++) {
1182       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1183       if (max < tmp) { max = tmp; }
1184     }
1185     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1186     mat->rowindices = (int*)(mat->rowvalues + max);
1187   }
1188 
1189   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1190   lrow = row - rstart;
1191 
1192   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1193   if (!v)   {pvA = 0; pvB = 0;}
1194   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1195   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1196   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1197   nztot = nzA + nzB;
1198 
1199   cmap  = mat->garray;
1200   if (v  || idx) {
1201     if (nztot) {
1202       /* Sort by increasing column numbers, assuming A and B already sorted */
1203       int imark = -1;
1204       if (v) {
1205         *v = v_p = mat->rowvalues;
1206         for (i=0; i<nzB; i++) {
1207           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1208           else break;
1209         }
1210         imark = i;
1211         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1212         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1213       }
1214       if (idx) {
1215         *idx = idx_p = mat->rowindices;
1216         if (imark > -1) {
1217           for (i=0; i<imark; i++) {
1218             idx_p[i] = cmap[cworkB[i]];
1219           }
1220         } else {
1221           for (i=0; i<nzB; i++) {
1222             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1223             else break;
1224           }
1225           imark = i;
1226         }
1227         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1228         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1229       }
1230     } else {
1231       if (idx) *idx = 0;
1232       if (v)   *v   = 0;
1233     }
1234   }
1235   *nz = nztot;
1236   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1237   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1243 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1244 {
1245   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1246 
1247   PetscFunctionBegin;
1248   if (aij->getrowactive == PETSC_FALSE) {
1249     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1250   }
1251   aij->getrowactive = PETSC_FALSE;
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatNorm_MPIAIJ"
1257 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1258 {
1259   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1260   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1261   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1262   PetscReal    sum = 0.0;
1263   PetscScalar  *v;
1264 
1265   PetscFunctionBegin;
1266   if (aij->size == 1) {
1267     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1268   } else {
1269     if (type == NORM_FROBENIUS) {
1270       v = amat->a;
1271       for (i=0; i<amat->nz; i++) {
1272 #if defined(PETSC_USE_COMPLEX)
1273         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1274 #else
1275         sum += (*v)*(*v); v++;
1276 #endif
1277       }
1278       v = bmat->a;
1279       for (i=0; i<bmat->nz; i++) {
1280 #if defined(PETSC_USE_COMPLEX)
1281         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1282 #else
1283         sum += (*v)*(*v); v++;
1284 #endif
1285       }
1286       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1287       *norm = sqrt(*norm);
1288     } else if (type == NORM_1) { /* max column norm */
1289       PetscReal *tmp,*tmp2;
1290       int    *jj,*garray = aij->garray;
1291       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1292       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1293       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1294       *norm = 0.0;
1295       v = amat->a; jj = amat->j;
1296       for (j=0; j<amat->nz; j++) {
1297         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1298       }
1299       v = bmat->a; jj = bmat->j;
1300       for (j=0; j<bmat->nz; j++) {
1301         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1302       }
1303       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1304       for (j=0; j<mat->N; j++) {
1305         if (tmp2[j] > *norm) *norm = tmp2[j];
1306       }
1307       ierr = PetscFree(tmp);CHKERRQ(ierr);
1308       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1309     } else if (type == NORM_INFINITY) { /* max row norm */
1310       PetscReal ntemp = 0.0;
1311       for (j=0; j<aij->A->m; j++) {
1312         v = amat->a + amat->i[j] + shift;
1313         sum = 0.0;
1314         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1315           sum += PetscAbsScalar(*v); v++;
1316         }
1317         v = bmat->a + bmat->i[j] + shift;
1318         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1319           sum += PetscAbsScalar(*v); v++;
1320         }
1321         if (sum > ntemp) ntemp = sum;
1322       }
1323       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1324     } else {
1325       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1326     }
1327   }
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatTranspose_MPIAIJ"
1333 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1334 {
1335   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1336   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1337   int          ierr,shift = Aloc->indexshift;
1338   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1339   Mat          B;
1340   PetscScalar  *array;
1341 
1342   PetscFunctionBegin;
1343   if (!matout && M != N) {
1344     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1345   }
1346 
1347   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1348 
1349   /* copy over the A part */
1350   Aloc = (Mat_SeqAIJ*)a->A->data;
1351   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1352   row = a->rstart;
1353   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1354   for (i=0; i<m; i++) {
1355     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1356     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1357   }
1358   aj = Aloc->j;
1359   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1360 
1361   /* copy over the B part */
1362   Aloc = (Mat_SeqAIJ*)a->B->data;
1363   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1364   row  = a->rstart;
1365   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1366   ct   = cols;
1367   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1368   for (i=0; i<m; i++) {
1369     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1370     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1371   }
1372   ierr = PetscFree(ct);CHKERRQ(ierr);
1373   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375   if (matout) {
1376     *matout = B;
1377   } else {
1378     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1385 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1386 {
1387   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1388   Mat        a = aij->A,b = aij->B;
1389   int        ierr,s1,s2,s3;
1390 
1391   PetscFunctionBegin;
1392   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1393   if (rr) {
1394     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1395     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1396     /* Overlap communication with computation. */
1397     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1398   }
1399   if (ll) {
1400     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1401     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1402     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1403   }
1404   /* scale  the diagonal block */
1405   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1406 
1407   if (rr) {
1408     /* Do a scatter end and then right scale the off-diagonal block */
1409     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1410     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1411   }
1412 
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1419 int MatPrintHelp_MPIAIJ(Mat A)
1420 {
1421   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1422   int        ierr;
1423 
1424   PetscFunctionBegin;
1425   if (!a->rank) {
1426     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1433 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1434 {
1435   PetscFunctionBegin;
1436   *bs = 1;
1437   PetscFunctionReturn(0);
1438 }
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1441 int MatSetUnfactored_MPIAIJ(Mat A)
1442 {
1443   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1444   int        ierr;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatEqual_MPIAIJ"
1453 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1454 {
1455   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1456   Mat        a,b,c,d;
1457   PetscTruth flg;
1458   int        ierr;
1459 
1460   PetscFunctionBegin;
1461   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1462   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1463   a = matA->A; b = matA->B;
1464   c = matB->A; d = matB->B;
1465 
1466   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1467   if (flg == PETSC_TRUE) {
1468     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1469   }
1470   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatCopy_MPIAIJ"
1476 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1477 {
1478   int        ierr;
1479   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1480   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1481   PetscTruth flg;
1482 
1483   PetscFunctionBegin;
1484   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1485   if (str != SAME_NONZERO_PATTERN || !flg) {
1486     /* because of the column compression in the off-processor part of the matrix a->B,
1487        the number of columns in a->B and b->B may be different, hence we cannot call
1488        the MatCopy() directly on the two parts. If need be, we can provide a more
1489        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1490        then copying the submatrices */
1491     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1492   } else {
1493     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1494     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1495   }
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1501 int MatSetUpPreallocation_MPIAIJ(Mat A)
1502 {
1503   int        ierr;
1504 
1505   PetscFunctionBegin;
1506   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1511 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1512 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1513 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1514 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1515 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1516 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1517 #endif
1518 
1519 #include "petscblaslapack.h"
1520 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatAXPY_MPIAIJ"
1523 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1524 {
1525   int        ierr,one=1,i;
1526   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1527   Mat_SeqAIJ *x,*y;
1528 
1529   PetscFunctionBegin;
1530   if (str == SAME_NONZERO_PATTERN) {
1531     x = (Mat_SeqAIJ *)xx->A->data;
1532     y = (Mat_SeqAIJ *)yy->A->data;
1533     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1534     x = (Mat_SeqAIJ *)xx->B->data;
1535     y = (Mat_SeqAIJ *)yy->B->data;
1536     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1537   } else if (str == SUBSET_NONZERO_PATTERN) {
1538     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1539 
1540     x = (Mat_SeqAIJ *)xx->B->data;
1541     y = (Mat_SeqAIJ *)yy->B->data;
1542     if (y->xtoy && y->XtoY != xx->B) {
1543       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1544       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1545     }
1546     if (!y->xtoy) { /* get xtoy */
1547       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1548       y->XtoY = xx->B;
1549     }
1550     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1551   } else {
1552     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /* -------------------------------------------------------------------*/
1558 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1559        MatGetRow_MPIAIJ,
1560        MatRestoreRow_MPIAIJ,
1561        MatMult_MPIAIJ,
1562        MatMultAdd_MPIAIJ,
1563        MatMultTranspose_MPIAIJ,
1564        MatMultTransposeAdd_MPIAIJ,
1565        0,
1566        0,
1567        0,
1568        0,
1569        0,
1570        0,
1571        MatRelax_MPIAIJ,
1572        MatTranspose_MPIAIJ,
1573        MatGetInfo_MPIAIJ,
1574        MatEqual_MPIAIJ,
1575        MatGetDiagonal_MPIAIJ,
1576        MatDiagonalScale_MPIAIJ,
1577        MatNorm_MPIAIJ,
1578        MatAssemblyBegin_MPIAIJ,
1579        MatAssemblyEnd_MPIAIJ,
1580        0,
1581        MatSetOption_MPIAIJ,
1582        MatZeroEntries_MPIAIJ,
1583        MatZeroRows_MPIAIJ,
1584 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1585        MatLUFactorSymbolic_MPIAIJ_TFS,
1586 #else
1587        0,
1588 #endif
1589        0,
1590        0,
1591        0,
1592        MatSetUpPreallocation_MPIAIJ,
1593        0,
1594        0,
1595        0,
1596        0,
1597        MatDuplicate_MPIAIJ,
1598        0,
1599        0,
1600        0,
1601        0,
1602        MatAXPY_MPIAIJ,
1603        MatGetSubMatrices_MPIAIJ,
1604        MatIncreaseOverlap_MPIAIJ,
1605        MatGetValues_MPIAIJ,
1606        MatCopy_MPIAIJ,
1607        MatPrintHelp_MPIAIJ,
1608        MatScale_MPIAIJ,
1609        0,
1610        0,
1611        0,
1612        MatGetBlockSize_MPIAIJ,
1613        0,
1614        0,
1615        0,
1616        0,
1617        MatFDColoringCreate_MPIAIJ,
1618        0,
1619        MatSetUnfactored_MPIAIJ,
1620        0,
1621        0,
1622        MatGetSubMatrix_MPIAIJ,
1623        MatDestroy_MPIAIJ,
1624        MatView_MPIAIJ,
1625        MatGetPetscMaps_Petsc,
1626        0,
1627        0,
1628        0,
1629        0,
1630        0,
1631        0,
1632        0,
1633        0,
1634        MatSetColoring_MPIAIJ,
1635        MatSetValuesAdic_MPIAIJ,
1636        MatSetValuesAdifor_MPIAIJ
1637 };
1638 
1639 /* ----------------------------------------------------------------------------------------*/
1640 
1641 EXTERN_C_BEGIN
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1644 int MatStoreValues_MPIAIJ(Mat mat)
1645 {
1646   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1647   int        ierr;
1648 
1649   PetscFunctionBegin;
1650   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1651   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 EXTERN_C_END
1655 
1656 EXTERN_C_BEGIN
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1659 int MatRetrieveValues_MPIAIJ(Mat mat)
1660 {
1661   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1662   int        ierr;
1663 
1664   PetscFunctionBegin;
1665   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1666   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 EXTERN_C_END
1670 
1671 #include "petscpc.h"
1672 EXTERN_C_BEGIN
1673 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1674 EXTERN_C_END
1675 
1676 EXTERN_C_BEGIN
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatCreate_MPIAIJ"
1679 int MatCreate_MPIAIJ(Mat B)
1680 {
1681   Mat_MPIAIJ *b;
1682   int        ierr,i,size;
1683 
1684   PetscFunctionBegin;
1685   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1686 
1687   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1688   B->data         = (void*)b;
1689   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1690   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1691   B->factor       = 0;
1692   B->assembled    = PETSC_FALSE;
1693   B->mapping      = 0;
1694 
1695   B->insertmode      = NOT_SET_VALUES;
1696   b->size            = size;
1697   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1698 
1699   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1700   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1701 
1702   /* the information in the maps duplicates the information computed below, eventually
1703      we should remove the duplicate information that is not contained in the maps */
1704   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1705   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1706 
1707   /* build local table of row and column ownerships */
1708   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1709   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1710   b->cowners = b->rowners + b->size + 2;
1711   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1712   b->rowners[0] = 0;
1713   for (i=2; i<=b->size; i++) {
1714     b->rowners[i] += b->rowners[i-1];
1715   }
1716   b->rstart = b->rowners[b->rank];
1717   b->rend   = b->rowners[b->rank+1];
1718   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1719   b->cowners[0] = 0;
1720   for (i=2; i<=b->size; i++) {
1721     b->cowners[i] += b->cowners[i-1];
1722   }
1723   b->cstart = b->cowners[b->rank];
1724   b->cend   = b->cowners[b->rank+1];
1725 
1726   /* build cache for off array entries formed */
1727   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1728   b->donotstash  = PETSC_FALSE;
1729   b->colmap      = 0;
1730   b->garray      = 0;
1731   b->roworiented = PETSC_TRUE;
1732 
1733   /* stuff used for matrix vector multiply */
1734   b->lvec      = PETSC_NULL;
1735   b->Mvctx     = PETSC_NULL;
1736 
1737   /* stuff for MatGetRow() */
1738   b->rowindices   = 0;
1739   b->rowvalues    = 0;
1740   b->getrowactive = PETSC_FALSE;
1741 
1742   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1743                                      "MatStoreValues_MPIAIJ",
1744                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1745   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1746                                      "MatRetrieveValues_MPIAIJ",
1747                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1748   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1749                                      "MatGetDiagonalBlock_MPIAIJ",
1750                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1751 
1752   PetscFunctionReturn(0);
1753 }
1754 EXTERN_C_END
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1758 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1759 {
1760   Mat        mat;
1761   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1762   int        ierr;
1763 
1764   PetscFunctionBegin;
1765   *newmat       = 0;
1766   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1767   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1768   a    = (Mat_MPIAIJ*)mat->data;
1769   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1770   mat->factor       = matin->factor;
1771   mat->assembled    = PETSC_TRUE;
1772   mat->insertmode   = NOT_SET_VALUES;
1773   mat->preallocated = PETSC_TRUE;
1774 
1775   a->rstart       = oldmat->rstart;
1776   a->rend         = oldmat->rend;
1777   a->cstart       = oldmat->cstart;
1778   a->cend         = oldmat->cend;
1779   a->size         = oldmat->size;
1780   a->rank         = oldmat->rank;
1781   a->donotstash   = oldmat->donotstash;
1782   a->roworiented  = oldmat->roworiented;
1783   a->rowindices   = 0;
1784   a->rowvalues    = 0;
1785   a->getrowactive = PETSC_FALSE;
1786 
1787   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1788   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1789   if (oldmat->colmap) {
1790 #if defined (PETSC_USE_CTABLE)
1791     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1792 #else
1793     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1794     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1795     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1796 #endif
1797   } else a->colmap = 0;
1798   if (oldmat->garray) {
1799     int len;
1800     len  = oldmat->B->n;
1801     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1802     PetscLogObjectMemory(mat,len*sizeof(int));
1803     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1804   } else a->garray = 0;
1805 
1806   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1807   PetscLogObjectParent(mat,a->lvec);
1808   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1809   PetscLogObjectParent(mat,a->Mvctx);
1810   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1811   PetscLogObjectParent(mat,a->A);
1812   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1813   PetscLogObjectParent(mat,a->B);
1814   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1815   *newmat = mat;
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #include "petscsys.h"
1820 
1821 EXTERN_C_BEGIN
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatLoad_MPIAIJ"
1824 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1825 {
1826   Mat          A;
1827   PetscScalar  *vals,*svals;
1828   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1829   MPI_Status   status;
1830   int          i,nz,ierr,j,rstart,rend,fd;
1831   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1832   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1833   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1834 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST)
1835   PetscTruth   flag;
1836 #endif
1837 
1838   PetscFunctionBegin;
1839   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1840   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1841   if (!rank) {
1842     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1843     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1844     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1845     if (header[3] < 0) {
1846       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1847     }
1848   }
1849 
1850   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1851   M = header[1]; N = header[2];
1852   /* determine ownership of all rows */
1853   m = M/size + ((M % size) > rank);
1854   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1855   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1856   rowners[0] = 0;
1857   for (i=2; i<=size; i++) {
1858     rowners[i] += rowners[i-1];
1859   }
1860   rstart = rowners[rank];
1861   rend   = rowners[rank+1];
1862 
1863   /* distribute row lengths to all processors */
1864   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1865   offlens = ourlens + (rend-rstart);
1866   if (!rank) {
1867     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1868     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1869     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1870     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1871     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1872     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1873   } else {
1874     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1875   }
1876 
1877   if (!rank) {
1878     /* calculate the number of nonzeros on each processor */
1879     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1880     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1881     for (i=0; i<size; i++) {
1882       for (j=rowners[i]; j< rowners[i+1]; j++) {
1883         procsnz[i] += rowlengths[j];
1884       }
1885     }
1886     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1887 
1888     /* determine max buffer needed and allocate it */
1889     maxnz = 0;
1890     for (i=0; i<size; i++) {
1891       maxnz = PetscMax(maxnz,procsnz[i]);
1892     }
1893     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1894 
1895     /* read in my part of the matrix column indices  */
1896     nz   = procsnz[0];
1897     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1898     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1899 
1900     /* read in every one elses and ship off */
1901     for (i=1; i<size; i++) {
1902       nz   = procsnz[i];
1903       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1904       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1905     }
1906     ierr = PetscFree(cols);CHKERRQ(ierr);
1907   } else {
1908     /* determine buffer space needed for message */
1909     nz = 0;
1910     for (i=0; i<m; i++) {
1911       nz += ourlens[i];
1912     }
1913     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1914 
1915     /* receive message of column indices*/
1916     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1917     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1918     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1919   }
1920 
1921   /* determine column ownership if matrix is not square */
1922   if (N != M) {
1923     n      = N/size + ((N % size) > rank);
1924     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1925     cstart = cend - n;
1926   } else {
1927     cstart = rstart;
1928     cend   = rend;
1929     n      = cend - cstart;
1930   }
1931 
1932   /* loop over local rows, determining number of off diagonal entries */
1933   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1934   jj = 0;
1935   for (i=0; i<m; i++) {
1936     for (j=0; j<ourlens[i]; j++) {
1937       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1938       jj++;
1939     }
1940   }
1941 
1942   /* create our matrix */
1943   for (i=0; i<m; i++) {
1944     ourlens[i] -= offlens[i];
1945   }
1946   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1947   A = *newmat;
1948   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1949   for (i=0; i<m; i++) {
1950     ourlens[i] += offlens[i];
1951   }
1952 
1953   if (!rank) {
1954     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1955 
1956     /* read in my part of the matrix numerical values  */
1957     nz   = procsnz[0];
1958     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1959 
1960     /* insert into matrix */
1961     jj      = rstart;
1962     smycols = mycols;
1963     svals   = vals;
1964     for (i=0; i<m; i++) {
1965       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1966       smycols += ourlens[i];
1967       svals   += ourlens[i];
1968       jj++;
1969     }
1970 
1971     /* read in other processors and ship out */
1972     for (i=1; i<size; i++) {
1973       nz   = procsnz[i];
1974       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1975       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1976     }
1977     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1978   } else {
1979     /* receive numeric values */
1980     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1981 
1982     /* receive message of values*/
1983     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1984     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1985     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1986 
1987     /* insert into matrix */
1988     jj      = rstart;
1989     smycols = mycols;
1990     svals   = vals;
1991     for (i=0; i<m; i++) {
1992       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1993       smycols += ourlens[i];
1994       svals   += ourlens[i];
1995       jj++;
1996     }
1997   }
1998   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1999   ierr = PetscFree(vals);CHKERRQ(ierr);
2000   ierr = PetscFree(mycols);CHKERRQ(ierr);
2001   ierr = PetscFree(rowners);CHKERRQ(ierr);
2002 
2003   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2004   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2005 #if defined(PETSC_HAVE_SPOOLES)
2006   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2007   if (flag) {
2008     if (size == 1) {
2009       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2010     } else {
2011       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2012     }
2013   }
2014 #endif
2015 #if defined(PETSC_HAVE_SUPERLUDIST)
2016   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2017   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2018 #endif
2019   PetscFunctionReturn(0);
2020 }
2021 EXTERN_C_END
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2025 /*
2026     Not great since it makes two copies of the submatrix, first an SeqAIJ
2027   in local and then by concatenating the local matrices the end result.
2028   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2029 */
2030 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2031 {
2032   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2033   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2034   Mat          *local,M,Mreuse;
2035   PetscScalar  *vwork,*aa;
2036   MPI_Comm     comm = mat->comm;
2037   Mat_SeqAIJ   *aij;
2038 
2039 
2040   PetscFunctionBegin;
2041   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2043 
2044   if (call ==  MAT_REUSE_MATRIX) {
2045     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2046     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2047     local = &Mreuse;
2048     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2049   } else {
2050     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2051     Mreuse = *local;
2052     ierr   = PetscFree(local);CHKERRQ(ierr);
2053   }
2054 
2055   /*
2056       m - number of local rows
2057       n - number of columns (same on all processors)
2058       rstart - first row in new global matrix generated
2059   */
2060   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2061   if (call == MAT_INITIAL_MATRIX) {
2062     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2063     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2064     ii  = aij->i;
2065     jj  = aij->j;
2066 
2067     /*
2068         Determine the number of non-zeros in the diagonal and off-diagonal
2069         portions of the matrix in order to do correct preallocation
2070     */
2071 
2072     /* first get start and end of "diagonal" columns */
2073     if (csize == PETSC_DECIDE) {
2074       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2075       if (mglobal == n) { /* square matrix */
2076 	nlocal = m;
2077       } else {
2078         nlocal = n/size + ((n % size) > rank);
2079       }
2080     } else {
2081       nlocal = csize;
2082     }
2083     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2084     rstart = rend - nlocal;
2085     if (rank == size - 1 && rend != n) {
2086       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2087     }
2088 
2089     /* next, compute all the lengths */
2090     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2091     olens = dlens + m;
2092     for (i=0; i<m; i++) {
2093       jend = ii[i+1] - ii[i];
2094       olen = 0;
2095       dlen = 0;
2096       for (j=0; j<jend; j++) {
2097         if (*jj < rstart || *jj >= rend) olen++;
2098         else dlen++;
2099         jj++;
2100       }
2101       olens[i] = olen;
2102       dlens[i] = dlen;
2103     }
2104     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2105     ierr = PetscFree(dlens);CHKERRQ(ierr);
2106   } else {
2107     int ml,nl;
2108 
2109     M = *newmat;
2110     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2111     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2112     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2113     /*
2114          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2115        rather than the slower MatSetValues().
2116     */
2117     M->was_assembled = PETSC_TRUE;
2118     M->assembled     = PETSC_FALSE;
2119   }
2120   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2121   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2122   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2123   ii  = aij->i;
2124   jj  = aij->j;
2125   aa  = aij->a;
2126   for (i=0; i<m; i++) {
2127     row   = rstart + i;
2128     nz    = ii[i+1] - ii[i];
2129     cwork = jj;     jj += nz;
2130     vwork = aa;     aa += nz;
2131     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2132   }
2133 
2134   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   *newmat = M;
2137 
2138   /* save submatrix used in processor for next request */
2139   if (call ==  MAT_INITIAL_MATRIX) {
2140     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2141     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2142   }
2143 
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2149 /*@C
2150    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2151    (the default parallel PETSc format).  For good matrix assembly performance
2152    the user should preallocate the matrix storage by setting the parameters
2153    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2154    performance can be increased by more than a factor of 50.
2155 
2156    Collective on MPI_Comm
2157 
2158    Input Parameters:
2159 +  A - the matrix
2160 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2161            (same value is used for all local rows)
2162 .  d_nnz - array containing the number of nonzeros in the various rows of the
2163            DIAGONAL portion of the local submatrix (possibly different for each row)
2164            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2165            The size of this array is equal to the number of local rows, i.e 'm'.
2166            You must leave room for the diagonal entry even if it is zero.
2167 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2168            submatrix (same value is used for all local rows).
2169 -  o_nnz - array containing the number of nonzeros in the various rows of the
2170            OFF-DIAGONAL portion of the local submatrix (possibly different for
2171            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2172            structure. The size of this array is equal to the number
2173            of local rows, i.e 'm'.
2174 
2175    The AIJ format (also called the Yale sparse matrix format or
2176    compressed row storage), is fully compatible with standard Fortran 77
2177    storage.  That is, the stored row and column indices can begin at
2178    either one (as in Fortran) or zero.  See the users manual for details.
2179 
2180    The user MUST specify either the local or global matrix dimensions
2181    (possibly both).
2182 
2183    The parallel matrix is partitioned such that the first m0 rows belong to
2184    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2185    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2186 
2187    The DIAGONAL portion of the local submatrix of a processor can be defined
2188    as the submatrix which is obtained by extraction the part corresponding
2189    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2190    first row that belongs to the processor, and r2 is the last row belonging
2191    to the this processor. This is a square mxm matrix. The remaining portion
2192    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2193 
2194    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2195 
2196    By default, this format uses inodes (identical nodes) when possible.
2197    We search for consecutive rows with the same nonzero structure, thereby
2198    reusing matrix information to achieve increased efficiency.
2199 
2200    Options Database Keys:
2201 +  -mat_aij_no_inode  - Do not use inodes
2202 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2203 -  -mat_aij_oneindex - Internally use indexing starting at 1
2204         rather than 0.  Note that when calling MatSetValues(),
2205         the user still MUST index entries starting at 0!
2206 
2207    Example usage:
2208 
2209    Consider the following 8x8 matrix with 34 non-zero values, that is
2210    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2211    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2212    as follows:
2213 
2214 .vb
2215             1  2  0  |  0  3  0  |  0  4
2216     Proc0   0  5  6  |  7  0  0  |  8  0
2217             9  0 10  | 11  0  0  | 12  0
2218     -------------------------------------
2219            13  0 14  | 15 16 17  |  0  0
2220     Proc1   0 18  0  | 19 20 21  |  0  0
2221             0  0  0  | 22 23  0  | 24  0
2222     -------------------------------------
2223     Proc2  25 26 27  |  0  0 28  | 29  0
2224            30  0  0  | 31 32 33  |  0 34
2225 .ve
2226 
2227    This can be represented as a collection of submatrices as:
2228 
2229 .vb
2230       A B C
2231       D E F
2232       G H I
2233 .ve
2234 
2235    Where the submatrices A,B,C are owned by proc0, D,E,F are
2236    owned by proc1, G,H,I are owned by proc2.
2237 
2238    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2239    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2240    The 'M','N' parameters are 8,8, and have the same values on all procs.
2241 
2242    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2243    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2244    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2245    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2246    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2247    matrix, ans [DF] as another SeqAIJ matrix.
2248 
2249    When d_nz, o_nz parameters are specified, d_nz storage elements are
2250    allocated for every row of the local diagonal submatrix, and o_nz
2251    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2252    One way to choose d_nz and o_nz is to use the max nonzerors per local
2253    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2254    In this case, the values of d_nz,o_nz are:
2255 .vb
2256      proc0 : dnz = 2, o_nz = 2
2257      proc1 : dnz = 3, o_nz = 2
2258      proc2 : dnz = 1, o_nz = 4
2259 .ve
2260    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2261    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2262    for proc3. i.e we are using 12+15+10=37 storage locations to store
2263    34 values.
2264 
2265    When d_nnz, o_nnz parameters are specified, the storage is specified
2266    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2267    In the above case the values for d_nnz,o_nnz are:
2268 .vb
2269      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2270      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2271      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2272 .ve
2273    Here the space allocated is sum of all the above values i.e 34, and
2274    hence pre-allocation is perfect.
2275 
2276    Level: intermediate
2277 
2278 .keywords: matrix, aij, compressed row, sparse, parallel
2279 
2280 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2281 @*/
2282 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2283 {
2284   Mat_MPIAIJ   *b;
2285   int          ierr,i;
2286   PetscTruth   flg2;
2287 
2288   PetscFunctionBegin;
2289   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2290   if (!flg2) PetscFunctionReturn(0);
2291   B->preallocated = PETSC_TRUE;
2292   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2293   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2294   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2295   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2296   if (d_nnz) {
2297     for (i=0; i<B->m; i++) {
2298       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2299     }
2300   }
2301   if (o_nnz) {
2302     for (i=0; i<B->m; i++) {
2303       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2304     }
2305   }
2306   b = (Mat_MPIAIJ*)B->data;
2307 
2308   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2309   PetscLogObjectParent(B,b->A);
2310   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2311   PetscLogObjectParent(B,b->B);
2312 
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatCreateMPIAIJ"
2318 /*@C
2319    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2320    (the default parallel PETSc format).  For good matrix assembly performance
2321    the user should preallocate the matrix storage by setting the parameters
2322    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2323    performance can be increased by more than a factor of 50.
2324 
2325    Collective on MPI_Comm
2326 
2327    Input Parameters:
2328 +  comm - MPI communicator
2329 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2330            This value should be the same as the local size used in creating the
2331            y vector for the matrix-vector product y = Ax.
2332 .  n - This value should be the same as the local size used in creating the
2333        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2334        calculated if N is given) For square matrices n is almost always m.
2335 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2336 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2337 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2338            (same value is used for all local rows)
2339 .  d_nnz - array containing the number of nonzeros in the various rows of the
2340            DIAGONAL portion of the local submatrix (possibly different for each row)
2341            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2342            The size of this array is equal to the number of local rows, i.e 'm'.
2343            You must leave room for the diagonal entry even if it is zero.
2344 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2345            submatrix (same value is used for all local rows).
2346 -  o_nnz - array containing the number of nonzeros in the various rows of the
2347            OFF-DIAGONAL portion of the local submatrix (possibly different for
2348            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2349            structure. The size of this array is equal to the number
2350            of local rows, i.e 'm'.
2351 
2352    Output Parameter:
2353 .  A - the matrix
2354 
2355    Notes:
2356    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2357    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2358    storage requirements for this matrix.
2359 
2360    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2361    processor than it must be used on all processors that share the object for
2362    that argument.
2363 
2364    The AIJ format (also called the Yale sparse matrix format or
2365    compressed row storage), is fully compatible with standard Fortran 77
2366    storage.  That is, the stored row and column indices can begin at
2367    either one (as in Fortran) or zero.  See the users manual for details.
2368 
2369    The user MUST specify either the local or global matrix dimensions
2370    (possibly both).
2371 
2372    The parallel matrix is partitioned such that the first m0 rows belong to
2373    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2374    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2375 
2376    The DIAGONAL portion of the local submatrix of a processor can be defined
2377    as the submatrix which is obtained by extraction the part corresponding
2378    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2379    first row that belongs to the processor, and r2 is the last row belonging
2380    to the this processor. This is a square mxm matrix. The remaining portion
2381    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2382 
2383    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2384 
2385    By default, this format uses inodes (identical nodes) when possible.
2386    We search for consecutive rows with the same nonzero structure, thereby
2387    reusing matrix information to achieve increased efficiency.
2388 
2389    Options Database Keys:
2390 +  -mat_aij_no_inode  - Do not use inodes
2391 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2392 -  -mat_aij_oneindex - Internally use indexing starting at 1
2393         rather than 0.  Note that when calling MatSetValues(),
2394         the user still MUST index entries starting at 0!
2395 
2396 
2397    Example usage:
2398 
2399    Consider the following 8x8 matrix with 34 non-zero values, that is
2400    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2401    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2402    as follows:
2403 
2404 .vb
2405             1  2  0  |  0  3  0  |  0  4
2406     Proc0   0  5  6  |  7  0  0  |  8  0
2407             9  0 10  | 11  0  0  | 12  0
2408     -------------------------------------
2409            13  0 14  | 15 16 17  |  0  0
2410     Proc1   0 18  0  | 19 20 21  |  0  0
2411             0  0  0  | 22 23  0  | 24  0
2412     -------------------------------------
2413     Proc2  25 26 27  |  0  0 28  | 29  0
2414            30  0  0  | 31 32 33  |  0 34
2415 .ve
2416 
2417    This can be represented as a collection of submatrices as:
2418 
2419 .vb
2420       A B C
2421       D E F
2422       G H I
2423 .ve
2424 
2425    Where the submatrices A,B,C are owned by proc0, D,E,F are
2426    owned by proc1, G,H,I are owned by proc2.
2427 
2428    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2429    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2430    The 'M','N' parameters are 8,8, and have the same values on all procs.
2431 
2432    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2433    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2434    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2435    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2436    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2437    matrix, ans [DF] as another SeqAIJ matrix.
2438 
2439    When d_nz, o_nz parameters are specified, d_nz storage elements are
2440    allocated for every row of the local diagonal submatrix, and o_nz
2441    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2442    One way to choose d_nz and o_nz is to use the max nonzerors per local
2443    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2444    In this case, the values of d_nz,o_nz are:
2445 .vb
2446      proc0 : dnz = 2, o_nz = 2
2447      proc1 : dnz = 3, o_nz = 2
2448      proc2 : dnz = 1, o_nz = 4
2449 .ve
2450    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2451    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2452    for proc3. i.e we are using 12+15+10=37 storage locations to store
2453    34 values.
2454 
2455    When d_nnz, o_nnz parameters are specified, the storage is specified
2456    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2457    In the above case the values for d_nnz,o_nnz are:
2458 .vb
2459      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2460      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2461      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2462 .ve
2463    Here the space allocated is sum of all the above values i.e 34, and
2464    hence pre-allocation is perfect.
2465 
2466    Level: intermediate
2467 
2468 .keywords: matrix, aij, compressed row, sparse, parallel
2469 
2470 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2471 @*/
2472 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2473 {
2474   int ierr,size;
2475 
2476   PetscFunctionBegin;
2477   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2478   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2479   if (size > 1) {
2480     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2481     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2482   } else {
2483     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2484     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2485   }
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2491 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2492 {
2493   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2494   PetscFunctionBegin;
2495   *Ad     = a->A;
2496   *Ao     = a->B;
2497   *colmap = a->garray;
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2503 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2504 {
2505   int        ierr,i;
2506   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2507 
2508   PetscFunctionBegin;
2509   if (coloring->ctype == IS_COLORING_LOCAL) {
2510     ISColoringValue *allcolors,*colors;
2511     ISColoring      ocoloring;
2512 
2513     /* set coloring for diagonal portion */
2514     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2515 
2516     /* set coloring for off-diagonal portion */
2517     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2518     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2519     for (i=0; i<a->B->n; i++) {
2520       colors[i] = allcolors[a->garray[i]];
2521     }
2522     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2523     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2524     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2525     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2526   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2527     ISColoringValue *colors;
2528     int             *larray;
2529     ISColoring      ocoloring;
2530 
2531     /* set coloring for diagonal portion */
2532     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2533     for (i=0; i<a->A->n; i++) {
2534       larray[i] = i + a->cstart;
2535     }
2536     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2537     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2538     for (i=0; i<a->A->n; i++) {
2539       colors[i] = coloring->colors[larray[i]];
2540     }
2541     ierr = PetscFree(larray);CHKERRQ(ierr);
2542     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2543     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2544     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2545 
2546     /* set coloring for off-diagonal portion */
2547     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2548     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2549     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2550     for (i=0; i<a->B->n; i++) {
2551       colors[i] = coloring->colors[larray[i]];
2552     }
2553     ierr = PetscFree(larray);CHKERRQ(ierr);
2554     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2555     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2556     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2557   } else {
2558     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2559   }
2560 
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2566 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2567 {
2568   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2569   int        ierr;
2570 
2571   PetscFunctionBegin;
2572   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2573   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 #undef __FUNCT__
2578 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2579 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2580 {
2581   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2582   int        ierr;
2583 
2584   PetscFunctionBegin;
2585   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2586   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatMerge"
2592 /*@C
2593       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2594                  matrices from each processor
2595 
2596     Collective on MPI_Comm
2597 
2598    Input Parameters:
2599 +    comm - the communicators the parallel matrix will live on
2600 -    inmat - the input sequential matrices
2601 
2602    Output Parameter:
2603 .    outmat - the parallel matrix generated
2604 
2605     Level: advanced
2606 
2607    Notes: The number of columns of the matrix in EACH of the seperate files
2608       MUST be the same.
2609 
2610 @*/
2611 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2612 {
2613   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2614   PetscScalar *values;
2615   PetscMap    columnmap,rowmap;
2616 
2617   PetscFunctionBegin;
2618 
2619   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2620 
2621   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2622   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2623   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2624   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2625   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2626   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2627 
2628   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2629   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2630   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2631   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2632   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2633 
2634   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2635   for (i=0;i<m;i++) {
2636     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2637     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2638     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2639   }
2640   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2641   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2642 
2643   for (i=0;i<m;i++) {
2644     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2645     I    = i + rstart;
2646     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2647     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2648   }
2649   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2650   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2652 
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatFileSplit"
2658 int MatFileSplit(Mat A,char *outfile)
2659 {
2660   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2661   PetscViewer out;
2662   char        *name;
2663   Mat         B;
2664   PetscScalar *values;
2665 
2666   PetscFunctionBegin;
2667 
2668   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2669   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2670   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2671   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2672   for (i=0;i<m;i++) {
2673     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2674     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2675     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2676   }
2677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679 
2680   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2681   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2682   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2683   sprintf(name,"%s.%d",outfile,rank);
2684   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2685   ierr = PetscFree(name);
2686   ierr = MatView(B,out);CHKERRQ(ierr);
2687   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2688   ierr = MatDestroy(B);CHKERRQ(ierr);
2689   PetscFunctionReturn(0);
2690 }
2691