xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d34347eca8ac9657cccc548b3ba1c8201ddea9cf)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
289     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
294         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 
359   PetscFunctionBegin;
360   if (!aij->donotstash) {
361     while (1) {
362       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
363       if (!flg) break;
364 
365       for (i=0; i<n;) {
366         /* Now identify the consecutive vals belonging to the same row */
367         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
368         if (j < n) ncols = j-i;
369         else       ncols = n-i;
370         /* Now assemble all these values with a single function call */
371         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
372         i = j;
373       }
374     }
375     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
376   }
377 
378   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
379   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
380 
381   /* determine if any processor has disassembled, if so we must
382      also disassemble ourselfs, in order that we may reassemble. */
383   /*
384      if nonzero structure of submatrix B cannot change then we know that
385      no processor disassembled thus we can skip this stuff
386   */
387   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
388     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
389     if (mat->was_assembled && !other_disassembled) {
390       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
399 
400   if (aij->rowvalues) {
401     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
402     aij->rowvalues = 0;
403   }
404 
405   /* used by MatAXPY() */
406   a->xtoy = 0; b->xtoy = 0;
407   a->XtoY = 0; b->XtoY = 0;
408 
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
414 int MatZeroEntries_MPIAIJ(Mat A)
415 {
416   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
417   int        ierr;
418 
419   PetscFunctionBegin;
420   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
421   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatZeroRows_MPIAIJ"
427 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
428 {
429   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
430   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
431   int            *nprocs,j,idx,nsends,row;
432   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
433   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
434   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
435   MPI_Comm       comm = A->comm;
436   MPI_Request    *send_waits,*recv_waits;
437   MPI_Status     recv_status,*send_status;
438   IS             istmp;
439   PetscTruth     found;
440 
441   PetscFunctionBegin;
442   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
443   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
444 
445   /*  first count number of contributors to each processor */
446   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
447   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
448   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
449   for (i=0; i<N; i++) {
450     idx = rows[i];
451     found = PETSC_FALSE;
452     for (j=0; j<size; j++) {
453       if (idx >= owners[j] && idx < owners[j+1]) {
454         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
455       }
456     }
457     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
458   }
459   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
460 
461   /* inform other processors of number of messages and max length*/
462   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
463 
464   /* post receives:   */
465   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
466   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
467   for (i=0; i<nrecvs; i++) {
468     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
469   }
470 
471   /* do sends:
472       1) starts[i] gives the starting index in svalues for stuff going to
473          the ith processor
474   */
475   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
476   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
477   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
478   starts[0] = 0;
479   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   for (i=0; i<N; i++) {
481     svalues[starts[owner[i]]++] = rows[i];
482   }
483   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
484 
485   starts[0] = 0;
486   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
487   count = 0;
488   for (i=0; i<size; i++) {
489     if (nprocs[2*i+1]) {
490       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
491     }
492   }
493   ierr = PetscFree(starts);CHKERRQ(ierr);
494 
495   base = owners[rank];
496 
497   /*  wait on receives */
498   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
499   source = lens + nrecvs;
500   count  = nrecvs; slen = 0;
501   while (count) {
502     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
503     /* unpack receives into our local space */
504     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
505     source[imdex]  = recv_status.MPI_SOURCE;
506     lens[imdex]    = n;
507     slen          += n;
508     count--;
509   }
510   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
511 
512   /* move the data into the send scatter */
513   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
514   count = 0;
515   for (i=0; i<nrecvs; i++) {
516     values = rvalues + i*nmax;
517     for (j=0; j<lens[i]; j++) {
518       lrows[count++] = values[j] - base;
519     }
520   }
521   ierr = PetscFree(rvalues);CHKERRQ(ierr);
522   ierr = PetscFree(lens);CHKERRQ(ierr);
523   ierr = PetscFree(owner);CHKERRQ(ierr);
524   ierr = PetscFree(nprocs);CHKERRQ(ierr);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PetscLogObjectParent(A,istmp);
529 
530   /*
531         Zero the required rows. If the "diagonal block" of the matrix
532      is square and the user wishes to set the diagonal we use seperate
533      code so that MatSetValues() is not called for each diagonal allocating
534      new memory, thus calling lots of mallocs and slowing things down.
535 
536        Contributed by: Mathew Knepley
537   */
538   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
539   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
540   if (diag && (l->A->M == l->A->N)) {
541     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
542   } else if (diag) {
543     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
544     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
545       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
547     }
548     for (i = 0; i < slen; i++) {
549       row  = lrows[i] + rstart;
550       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
551     }
552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554   } else {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556   }
557   ierr = ISDestroy(istmp);CHKERRQ(ierr);
558   ierr = PetscFree(lrows);CHKERRQ(ierr);
559 
560   /* wait on sends */
561   if (nsends) {
562     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
563     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
564     ierr = PetscFree(send_status);CHKERRQ(ierr);
565   }
566   ierr = PetscFree(send_waits);CHKERRQ(ierr);
567   ierr = PetscFree(svalues);CHKERRQ(ierr);
568 
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatMult_MPIAIJ"
574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
575 {
576   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577   int        ierr,nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != A->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatMultAdd_MPIAIJ"
593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   int        ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 EXTERN_C_BEGIN
628 #undef __FUNCT__
629 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
630 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
631 {
632   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
633   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
634   IS         Me,Notme;
635   int        M,N,first,last,*notme,i, ierr;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643 
644   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
645   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
646   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
647   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
648   for (i=0; i<first; i++) notme[i] = i;
649   for (i=last; i<M; i++) notme[i-last+first] = i;
650   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
651   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
652   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
653   Aoff = Aoffs[0];
654   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
655   Boff = Boffs[0];
656   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
657   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
658   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
659   ierr = ISDestroy(Me); CHKERRQ(ierr);
660   ierr = ISDestroy(Notme); CHKERRQ(ierr);
661 
662   PetscFunctionReturn(0);
663 }
664 EXTERN_C_END
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
668 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
669 {
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671   int        ierr;
672 
673   PetscFunctionBegin;
674   /* do nondiagonal part */
675   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
676   /* send it on its way */
677   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
678   /* do local part */
679   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
680   /* receive remote parts: note this assumes the values are not actually */
681   /* inserted in yy until the next line, which is true for my implementation*/
682   /* but is not perhaps always true. */
683   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 /*
688   This only works correctly for square matrices where the subblock A->A is the
689    diagonal block
690 */
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
693 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
694 {
695   int        ierr;
696   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
697 
698   PetscFunctionBegin;
699   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
700   if (a->rstart != a->cstart || a->rend != a->cend) {
701     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
702   }
703   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatScale_MPIAIJ"
709 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
710 {
711   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
712   int        ierr;
713 
714   PetscFunctionBegin;
715   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
716   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatDestroy_MPIAIJ"
722 int MatDestroy_MPIAIJ(Mat mat)
723 {
724   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
725   int        ierr;
726 
727   PetscFunctionBegin;
728 #if defined(PETSC_USE_LOG)
729   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
730 #endif
731   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
732   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
733   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
734   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
735 #if defined (PETSC_USE_CTABLE)
736   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
737 #else
738   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
739 #endif
740   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
741   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
742   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
743   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
744   ierr = PetscFree(aij);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatView_MPIAIJ_Binary"
750 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
751 {
752   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
753   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
754   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
755   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
756   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
757   PetscScalar       *column_values;
758 
759   PetscFunctionBegin;
760   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
761   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
762   nz   = A->nz + B->nz;
763   if (rank == 0) {
764     header[0] = MAT_FILE_COOKIE;
765     header[1] = mat->M;
766     header[2] = mat->N;
767     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
768     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
769     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
770     /* get largest number of rows any processor has */
771     rlen = mat->m;
772     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
773     for (i=1; i<size; i++) {
774       rlen = PetscMax(rlen,range[i+1] - range[i]);
775     }
776   } else {
777     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
778     rlen = mat->m;
779   }
780 
781   /* load up the local row counts */
782   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
783   for (i=0; i<mat->m; i++) {
784     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
785   }
786 
787   /* store the row lengths to the file */
788   if (rank == 0) {
789     MPI_Status status;
790     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
791     for (i=1; i<size; i++) {
792       rlen = range[i+1] - range[i];
793       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
794       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
795     }
796   } else {
797     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
798   }
799   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
800 
801   /* load up the local column indices */
802   nzmax = nz; /* )th processor needs space a largest processor needs */
803   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
804   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
805   cnt  = 0;
806   for (i=0; i<mat->m; i++) {
807     for (j=B->i[i]; j<B->i[i+1]; j++) {
808       if ( (col = garray[B->j[j]]) > cstart) break;
809       column_indices[cnt++] = col;
810     }
811     for (k=A->i[i]; k<A->i[i+1]; k++) {
812       column_indices[cnt++] = A->j[k] + cstart;
813     }
814     for (; j<B->i[i+1]; j++) {
815       column_indices[cnt++] = garray[B->j[j]];
816     }
817   }
818   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
819 
820   /* store the column indices to the file */
821   if (rank == 0) {
822     MPI_Status status;
823     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
824     for (i=1; i<size; i++) {
825       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
826       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
827       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
828       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
829     }
830   } else {
831     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
832     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
833   }
834   ierr = PetscFree(column_indices);CHKERRQ(ierr);
835 
836   /* load up the local column values */
837   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
838   cnt  = 0;
839   for (i=0; i<mat->m; i++) {
840     for (j=B->i[i]; j<B->i[i+1]; j++) {
841       if ( garray[B->j[j]] > cstart) break;
842       column_values[cnt++] = B->a[j];
843     }
844     for (k=A->i[i]; k<A->i[i+1]; k++) {
845       column_values[cnt++] = A->a[k];
846     }
847     for (; j<B->i[i+1]; j++) {
848       column_values[cnt++] = B->a[j];
849     }
850   }
851   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
852 
853   /* store the column values to the file */
854   if (rank == 0) {
855     MPI_Status status;
856     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
857     for (i=1; i<size; i++) {
858       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
859       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
860       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
861       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
862     }
863   } else {
864     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
865     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
866   }
867   ierr = PetscFree(column_values);CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
873 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
874 {
875   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
876   int               ierr,rank = aij->rank,size = aij->size;
877   PetscTruth        isdraw,isascii,flg,isbinary;
878   PetscViewer       sviewer;
879   PetscViewerFormat format;
880 
881   PetscFunctionBegin;
882   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
883   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
884   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
885   if (isascii) {
886     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
887     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
888       MatInfo info;
889       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
890       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
891       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
892       if (flg) {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
894 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
895       } else {
896         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
897 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
898       }
899       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
901       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
902       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
903       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
904       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
905       PetscFunctionReturn(0);
906     } else if (format == PETSC_VIEWER_ASCII_INFO) {
907       PetscFunctionReturn(0);
908     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
909       PetscFunctionReturn(0);
910     }
911   } else if (isbinary) {
912     if (size == 1) {
913       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
914       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
915     } else {
916       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
917     }
918     PetscFunctionReturn(0);
919   } else if (isdraw) {
920     PetscDraw  draw;
921     PetscTruth isnull;
922     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
923     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
924   }
925 
926   if (size == 1) {
927     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
928     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
929   } else {
930     /* assemble the entire matrix onto first processor. */
931     Mat         A;
932     Mat_SeqAIJ *Aloc;
933     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
934     PetscScalar *a;
935 
936     if (!rank) {
937       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
938     } else {
939       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
940     }
941     PetscLogObjectParent(mat,A);
942 
943     /* copy over the A part */
944     Aloc = (Mat_SeqAIJ*)aij->A->data;
945     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
946     row = aij->rstart;
947     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
948     for (i=0; i<m; i++) {
949       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
950       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
951     }
952     aj = Aloc->j;
953     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
954 
955     /* copy over the B part */
956     Aloc = (Mat_SeqAIJ*)aij->B->data;
957     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
958     row  = aij->rstart;
959     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
960     ct   = cols;
961     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
962     for (i=0; i<m; i++) {
963       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
964       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
965     }
966     ierr = PetscFree(ct);CHKERRQ(ierr);
967     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969     /*
970        Everyone has to call to draw the matrix since the graphics waits are
971        synchronized across all processors that share the PetscDraw object
972     */
973     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
974     if (!rank) {
975       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
976       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
977     }
978     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
979     ierr = MatDestroy(A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatView_MPIAIJ"
986 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
987 {
988   int        ierr;
989   PetscTruth isascii,isdraw,issocket,isbinary;
990 
991   PetscFunctionBegin;
992   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
993   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
994   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
995   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
996   if (isascii || isdraw || isbinary || issocket) {
997     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
998   } else {
999     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatRelax_MPIAIJ"
1008 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1009 {
1010   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1011   int          ierr;
1012   Vec          bb1;
1013   PetscScalar  mone=-1.0;
1014 
1015   PetscFunctionBegin;
1016   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1017 
1018   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1019 
1020   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1021     if (flag & SOR_ZERO_INITIAL_GUESS) {
1022       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1023       its--;
1024     }
1025 
1026     while (its--) {
1027       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1028       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1029 
1030       /* update rhs: bb1 = bb - B*x */
1031       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1032       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1033 
1034       /* local sweep */
1035       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1036       CHKERRQ(ierr);
1037     }
1038   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1039     if (flag & SOR_ZERO_INITIAL_GUESS) {
1040       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1041       its--;
1042     }
1043     while (its--) {
1044       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1045       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1046 
1047       /* update rhs: bb1 = bb - B*x */
1048       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1049       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1050 
1051       /* local sweep */
1052       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1053       CHKERRQ(ierr);
1054     }
1055   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1056     if (flag & SOR_ZERO_INITIAL_GUESS) {
1057       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1058       its--;
1059     }
1060     while (its--) {
1061       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1063 
1064       /* update rhs: bb1 = bb - B*x */
1065       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1066       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1067 
1068       /* local sweep */
1069       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1070       CHKERRQ(ierr);
1071     }
1072   } else {
1073     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1074   }
1075 
1076   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1082 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1083 {
1084   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1085   Mat        A = mat->A,B = mat->B;
1086   int        ierr;
1087   PetscReal  isend[5],irecv[5];
1088 
1089   PetscFunctionBegin;
1090   info->block_size     = 1.0;
1091   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1093   isend[3] = info->memory;  isend[4] = info->mallocs;
1094   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1095   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1096   isend[3] += info->memory;  isend[4] += info->mallocs;
1097   if (flag == MAT_LOCAL) {
1098     info->nz_used      = isend[0];
1099     info->nz_allocated = isend[1];
1100     info->nz_unneeded  = isend[2];
1101     info->memory       = isend[3];
1102     info->mallocs      = isend[4];
1103   } else if (flag == MAT_GLOBAL_MAX) {
1104     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1105     info->nz_used      = irecv[0];
1106     info->nz_allocated = irecv[1];
1107     info->nz_unneeded  = irecv[2];
1108     info->memory       = irecv[3];
1109     info->mallocs      = irecv[4];
1110   } else if (flag == MAT_GLOBAL_SUM) {
1111     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1112     info->nz_used      = irecv[0];
1113     info->nz_allocated = irecv[1];
1114     info->nz_unneeded  = irecv[2];
1115     info->memory       = irecv[3];
1116     info->mallocs      = irecv[4];
1117   }
1118   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1119   info->fill_ratio_needed = 0;
1120   info->factor_mallocs    = 0;
1121   info->rows_global       = (double)matin->M;
1122   info->columns_global    = (double)matin->N;
1123   info->rows_local        = (double)matin->m;
1124   info->columns_local     = (double)matin->N;
1125 
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatSetOption_MPIAIJ"
1131 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1132 {
1133   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1134   int        ierr;
1135 
1136   PetscFunctionBegin;
1137   switch (op) {
1138   case MAT_NO_NEW_NONZERO_LOCATIONS:
1139   case MAT_YES_NEW_NONZERO_LOCATIONS:
1140   case MAT_COLUMNS_UNSORTED:
1141   case MAT_COLUMNS_SORTED:
1142   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1143   case MAT_KEEP_ZEROED_ROWS:
1144   case MAT_NEW_NONZERO_LOCATION_ERR:
1145   case MAT_USE_INODES:
1146   case MAT_DO_NOT_USE_INODES:
1147   case MAT_IGNORE_ZERO_ENTRIES:
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_ROW_ORIENTED:
1152     a->roworiented = PETSC_TRUE;
1153     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1154     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1155     break;
1156   case MAT_ROWS_SORTED:
1157   case MAT_ROWS_UNSORTED:
1158   case MAT_YES_NEW_DIAGONALS:
1159     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1160     break;
1161   case MAT_COLUMN_ORIENTED:
1162     a->roworiented = PETSC_FALSE;
1163     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1164     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1165     break;
1166   case MAT_IGNORE_OFF_PROC_ENTRIES:
1167     a->donotstash = PETSC_TRUE;
1168     break;
1169   case MAT_NO_NEW_DIAGONALS:
1170     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1171   default:
1172     SETERRQ(PETSC_ERR_SUP,"unknown option");
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatGetRow_MPIAIJ"
1179 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1180 {
1181   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1182   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1183   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1184   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1185   int          *cmap,*idx_p;
1186 
1187   PetscFunctionBegin;
1188   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1189   mat->getrowactive = PETSC_TRUE;
1190 
1191   if (!mat->rowvalues && (idx || v)) {
1192     /*
1193         allocate enough space to hold information from the longest row.
1194     */
1195     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1196     int     max = 1,tmp;
1197     for (i=0; i<matin->m; i++) {
1198       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1199       if (max < tmp) { max = tmp; }
1200     }
1201     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1202     mat->rowindices = (int*)(mat->rowvalues + max);
1203   }
1204 
1205   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1206   lrow = row - rstart;
1207 
1208   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1209   if (!v)   {pvA = 0; pvB = 0;}
1210   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1211   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1212   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1213   nztot = nzA + nzB;
1214 
1215   cmap  = mat->garray;
1216   if (v  || idx) {
1217     if (nztot) {
1218       /* Sort by increasing column numbers, assuming A and B already sorted */
1219       int imark = -1;
1220       if (v) {
1221         *v = v_p = mat->rowvalues;
1222         for (i=0; i<nzB; i++) {
1223           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1224           else break;
1225         }
1226         imark = i;
1227         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1228         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1229       }
1230       if (idx) {
1231         *idx = idx_p = mat->rowindices;
1232         if (imark > -1) {
1233           for (i=0; i<imark; i++) {
1234             idx_p[i] = cmap[cworkB[i]];
1235           }
1236         } else {
1237           for (i=0; i<nzB; i++) {
1238             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1239             else break;
1240           }
1241           imark = i;
1242         }
1243         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1244         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1245       }
1246     } else {
1247       if (idx) *idx = 0;
1248       if (v)   *v   = 0;
1249     }
1250   }
1251   *nz = nztot;
1252   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1253   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1259 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1260 {
1261   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1262 
1263   PetscFunctionBegin;
1264   if (aij->getrowactive == PETSC_FALSE) {
1265     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1266   }
1267   aij->getrowactive = PETSC_FALSE;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatNorm_MPIAIJ"
1273 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1274 {
1275   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1276   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1277   int          ierr,i,j,cstart = aij->cstart;
1278   PetscReal    sum = 0.0;
1279   PetscScalar  *v;
1280 
1281   PetscFunctionBegin;
1282   if (aij->size == 1) {
1283     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1284   } else {
1285     if (type == NORM_FROBENIUS) {
1286       v = amat->a;
1287       for (i=0; i<amat->nz; i++) {
1288 #if defined(PETSC_USE_COMPLEX)
1289         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1290 #else
1291         sum += (*v)*(*v); v++;
1292 #endif
1293       }
1294       v = bmat->a;
1295       for (i=0; i<bmat->nz; i++) {
1296 #if defined(PETSC_USE_COMPLEX)
1297         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1298 #else
1299         sum += (*v)*(*v); v++;
1300 #endif
1301       }
1302       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1303       *norm = sqrt(*norm);
1304     } else if (type == NORM_1) { /* max column norm */
1305       PetscReal *tmp,*tmp2;
1306       int    *jj,*garray = aij->garray;
1307       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1308       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1309       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1310       *norm = 0.0;
1311       v = amat->a; jj = amat->j;
1312       for (j=0; j<amat->nz; j++) {
1313         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1314       }
1315       v = bmat->a; jj = bmat->j;
1316       for (j=0; j<bmat->nz; j++) {
1317         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1318       }
1319       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1320       for (j=0; j<mat->N; j++) {
1321         if (tmp2[j] > *norm) *norm = tmp2[j];
1322       }
1323       ierr = PetscFree(tmp);CHKERRQ(ierr);
1324       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1325     } else if (type == NORM_INFINITY) { /* max row norm */
1326       PetscReal ntemp = 0.0;
1327       for (j=0; j<aij->A->m; j++) {
1328         v = amat->a + amat->i[j];
1329         sum = 0.0;
1330         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1331           sum += PetscAbsScalar(*v); v++;
1332         }
1333         v = bmat->a + bmat->i[j];
1334         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1335           sum += PetscAbsScalar(*v); v++;
1336         }
1337         if (sum > ntemp) ntemp = sum;
1338       }
1339       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1340     } else {
1341       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1342     }
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatTranspose_MPIAIJ"
1349 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1350 {
1351   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1352   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1353   int          ierr;
1354   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1355   Mat          B;
1356   PetscScalar  *array;
1357 
1358   PetscFunctionBegin;
1359   if (!matout && M != N) {
1360     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1361   }
1362 
1363   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1364 
1365   /* copy over the A part */
1366   Aloc = (Mat_SeqAIJ*)a->A->data;
1367   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1368   row = a->rstart;
1369   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1370   for (i=0; i<m; i++) {
1371     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1372     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1373   }
1374   aj = Aloc->j;
1375   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1376 
1377   /* copy over the B part */
1378   Aloc = (Mat_SeqAIJ*)a->B->data;
1379   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1380   row  = a->rstart;
1381   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1382   ct   = cols;
1383   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1384   for (i=0; i<m; i++) {
1385     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1386     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1387   }
1388   ierr = PetscFree(ct);CHKERRQ(ierr);
1389   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   if (matout) {
1392     *matout = B;
1393   } else {
1394     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1401 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1402 {
1403   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1404   Mat        a = aij->A,b = aij->B;
1405   int        ierr,s1,s2,s3;
1406 
1407   PetscFunctionBegin;
1408   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1409   if (rr) {
1410     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1411     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1412     /* Overlap communication with computation. */
1413     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1414   }
1415   if (ll) {
1416     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1417     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1418     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1419   }
1420   /* scale  the diagonal block */
1421   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1422 
1423   if (rr) {
1424     /* Do a scatter end and then right scale the off-diagonal block */
1425     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1426     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1427   }
1428 
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1435 int MatPrintHelp_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   if (!a->rank) {
1442     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1443   }
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1449 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1450 {
1451   PetscFunctionBegin;
1452   *bs = 1;
1453   PetscFunctionReturn(0);
1454 }
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1457 int MatSetUnfactored_MPIAIJ(Mat A)
1458 {
1459   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1460   int        ierr;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatEqual_MPIAIJ"
1469 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1470 {
1471   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1472   Mat        a,b,c,d;
1473   PetscTruth flg;
1474   int        ierr;
1475 
1476   PetscFunctionBegin;
1477   a = matA->A; b = matA->B;
1478   c = matB->A; d = matB->B;
1479 
1480   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1481   if (flg == PETSC_TRUE) {
1482     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1483   }
1484   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatCopy_MPIAIJ"
1490 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1491 {
1492   int        ierr;
1493   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1494   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1495 
1496   PetscFunctionBegin;
1497   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1498   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1499     /* because of the column compression in the off-processor part of the matrix a->B,
1500        the number of columns in a->B and b->B may be different, hence we cannot call
1501        the MatCopy() directly on the two parts. If need be, we can provide a more
1502        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1503        then copying the submatrices */
1504     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1505   } else {
1506     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1507     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1508   }
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1514 int MatSetUpPreallocation_MPIAIJ(Mat A)
1515 {
1516   int        ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1524 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1525 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1526 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
1527 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1528 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1529 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1530 #endif
1531 
1532 #include "petscblaslapack.h"
1533 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure);
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatAXPY_MPIAIJ"
1536 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1537 {
1538   int        ierr,one=1,i;
1539   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1540   Mat_SeqAIJ *x,*y;
1541 
1542   PetscFunctionBegin;
1543   if (str == SAME_NONZERO_PATTERN) {
1544     x = (Mat_SeqAIJ *)xx->A->data;
1545     y = (Mat_SeqAIJ *)yy->A->data;
1546     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1547     x = (Mat_SeqAIJ *)xx->B->data;
1548     y = (Mat_SeqAIJ *)yy->B->data;
1549     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1550   } else if (str == SUBSET_NONZERO_PATTERN) {
1551     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1552 
1553     x = (Mat_SeqAIJ *)xx->B->data;
1554     y = (Mat_SeqAIJ *)yy->B->data;
1555     if (y->xtoy && y->XtoY != xx->B) {
1556       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1557       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1558     }
1559     if (!y->xtoy) { /* get xtoy */
1560       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1561       y->XtoY = xx->B;
1562     }
1563     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1564   } else {
1565     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /* -------------------------------------------------------------------*/
1571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1572        MatGetRow_MPIAIJ,
1573        MatRestoreRow_MPIAIJ,
1574        MatMult_MPIAIJ,
1575        MatMultAdd_MPIAIJ,
1576        MatMultTranspose_MPIAIJ,
1577        MatMultTransposeAdd_MPIAIJ,
1578        0,
1579        0,
1580        0,
1581        0,
1582        0,
1583        0,
1584        MatRelax_MPIAIJ,
1585        MatTranspose_MPIAIJ,
1586        MatGetInfo_MPIAIJ,
1587        MatEqual_MPIAIJ,
1588        MatGetDiagonal_MPIAIJ,
1589        MatDiagonalScale_MPIAIJ,
1590        MatNorm_MPIAIJ,
1591        MatAssemblyBegin_MPIAIJ,
1592        MatAssemblyEnd_MPIAIJ,
1593        0,
1594        MatSetOption_MPIAIJ,
1595        MatZeroEntries_MPIAIJ,
1596        MatZeroRows_MPIAIJ,
1597 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1598        MatLUFactorSymbolic_MPIAIJ_TFS,
1599 #else
1600        0,
1601 #endif
1602        0,
1603        0,
1604        0,
1605        MatSetUpPreallocation_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609        0,
1610        MatDuplicate_MPIAIJ,
1611        0,
1612        0,
1613        0,
1614        0,
1615        MatAXPY_MPIAIJ,
1616        MatGetSubMatrices_MPIAIJ,
1617        MatIncreaseOverlap_MPIAIJ,
1618        MatGetValues_MPIAIJ,
1619        MatCopy_MPIAIJ,
1620        MatPrintHelp_MPIAIJ,
1621        MatScale_MPIAIJ,
1622        0,
1623        0,
1624        0,
1625        MatGetBlockSize_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        0,
1630        MatFDColoringCreate_MPIAIJ,
1631        0,
1632        MatSetUnfactored_MPIAIJ,
1633        0,
1634        0,
1635        MatGetSubMatrix_MPIAIJ,
1636        MatDestroy_MPIAIJ,
1637        MatView_MPIAIJ,
1638        MatGetPetscMaps_Petsc,
1639        0,
1640        0,
1641        0,
1642        0,
1643        0,
1644        0,
1645        0,
1646        0,
1647        MatSetColoring_MPIAIJ,
1648        MatSetValuesAdic_MPIAIJ,
1649        MatSetValuesAdifor_MPIAIJ
1650 };
1651 
1652 /* ----------------------------------------------------------------------------------------*/
1653 
1654 EXTERN_C_BEGIN
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1657 int MatStoreValues_MPIAIJ(Mat mat)
1658 {
1659   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1660   int        ierr;
1661 
1662   PetscFunctionBegin;
1663   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1664   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 EXTERN_C_END
1668 
1669 EXTERN_C_BEGIN
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1672 int MatRetrieveValues_MPIAIJ(Mat mat)
1673 {
1674   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1675   int        ierr;
1676 
1677   PetscFunctionBegin;
1678   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1679   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 EXTERN_C_END
1683 
1684 #include "petscpc.h"
1685 EXTERN_C_BEGIN
1686 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1687 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec);
1688 EXTERN_C_END
1689 
1690 EXTERN_C_BEGIN
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1693 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1694 {
1695   Mat_MPIAIJ   *b;
1696   int          ierr,i;
1697 
1698   PetscFunctionBegin;
1699   B->preallocated = PETSC_TRUE;
1700   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1701   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1702   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1703   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1704   if (d_nnz) {
1705     for (i=0; i<B->m; i++) {
1706       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1707     }
1708   }
1709   if (o_nnz) {
1710     for (i=0; i<B->m; i++) {
1711       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1712     }
1713   }
1714   b = (Mat_MPIAIJ*)B->data;
1715 
1716   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1717   PetscLogObjectParent(B,b->A);
1718   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1719   PetscLogObjectParent(B,b->B);
1720 
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 EXTERN_C_BEGIN
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatCreate_MPIAIJ"
1728 int MatCreate_MPIAIJ(Mat B)
1729 {
1730   Mat_MPIAIJ *b;
1731   int        ierr,i,size;
1732 
1733   PetscFunctionBegin;
1734   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1735 
1736   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1737   B->data         = (void*)b;
1738   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1739   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1740   B->factor       = 0;
1741   B->assembled    = PETSC_FALSE;
1742   B->mapping      = 0;
1743 
1744   B->insertmode      = NOT_SET_VALUES;
1745   b->size            = size;
1746   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1747 
1748   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1749   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1750 
1751   /* the information in the maps duplicates the information computed below, eventually
1752      we should remove the duplicate information that is not contained in the maps */
1753   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1754   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1755 
1756   /* build local table of row and column ownerships */
1757   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1758   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1759   b->cowners = b->rowners + b->size + 2;
1760   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1761   b->rowners[0] = 0;
1762   for (i=2; i<=b->size; i++) {
1763     b->rowners[i] += b->rowners[i-1];
1764   }
1765   b->rstart = b->rowners[b->rank];
1766   b->rend   = b->rowners[b->rank+1];
1767   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1768   b->cowners[0] = 0;
1769   for (i=2; i<=b->size; i++) {
1770     b->cowners[i] += b->cowners[i-1];
1771   }
1772   b->cstart = b->cowners[b->rank];
1773   b->cend   = b->cowners[b->rank+1];
1774 
1775   /* build cache for off array entries formed */
1776   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1777   b->donotstash  = PETSC_FALSE;
1778   b->colmap      = 0;
1779   b->garray      = 0;
1780   b->roworiented = PETSC_TRUE;
1781 
1782   /* stuff used for matrix vector multiply */
1783   b->lvec      = PETSC_NULL;
1784   b->Mvctx     = PETSC_NULL;
1785 
1786   /* stuff for MatGetRow() */
1787   b->rowindices   = 0;
1788   b->rowvalues    = 0;
1789   b->getrowactive = PETSC_FALSE;
1790 
1791   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1792                                      "MatStoreValues_MPIAIJ",
1793                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1795                                      "MatRetrieveValues_MPIAIJ",
1796                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1798 				     "MatGetDiagonalBlock_MPIAIJ",
1799                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1801 				     "MatIsSymmetric_MPIAIJ",
1802 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1804 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1805 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1807 				     "MatDiagonalScaleLocal_MPIAIJ",
1808 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1815 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1816 {
1817   Mat        mat;
1818   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1819   int        ierr;
1820 
1821   PetscFunctionBegin;
1822   *newmat       = 0;
1823   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1824   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1825   a    = (Mat_MPIAIJ*)mat->data;
1826   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1827   mat->factor       = matin->factor;
1828   mat->assembled    = PETSC_TRUE;
1829   mat->insertmode   = NOT_SET_VALUES;
1830   mat->preallocated = PETSC_TRUE;
1831 
1832   a->rstart       = oldmat->rstart;
1833   a->rend         = oldmat->rend;
1834   a->cstart       = oldmat->cstart;
1835   a->cend         = oldmat->cend;
1836   a->size         = oldmat->size;
1837   a->rank         = oldmat->rank;
1838   a->donotstash   = oldmat->donotstash;
1839   a->roworiented  = oldmat->roworiented;
1840   a->rowindices   = 0;
1841   a->rowvalues    = 0;
1842   a->getrowactive = PETSC_FALSE;
1843 
1844   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1845   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1846   if (oldmat->colmap) {
1847 #if defined (PETSC_USE_CTABLE)
1848     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1849 #else
1850     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1851     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1852     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1853 #endif
1854   } else a->colmap = 0;
1855   if (oldmat->garray) {
1856     int len;
1857     len  = oldmat->B->n;
1858     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1859     PetscLogObjectMemory(mat,len*sizeof(int));
1860     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1861   } else a->garray = 0;
1862 
1863   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1864   PetscLogObjectParent(mat,a->lvec);
1865   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1866   PetscLogObjectParent(mat,a->Mvctx);
1867   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1868   PetscLogObjectParent(mat,a->A);
1869   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1870   PetscLogObjectParent(mat,a->B);
1871   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1872   *newmat = mat;
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #include "petscsys.h"
1877 
1878 EXTERN_C_BEGIN
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatLoad_MPIAIJ"
1881 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1882 {
1883   Mat          A;
1884   PetscScalar  *vals,*svals;
1885   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1886   MPI_Status   status;
1887   int          i,nz,ierr,j,rstart,rend,fd;
1888   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1889   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1890   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1891 
1892   PetscFunctionBegin;
1893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1894   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1895   if (!rank) {
1896     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1897     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1898     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1899     if (header[3] < 0) {
1900       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1901     }
1902   }
1903 
1904   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1905   M = header[1]; N = header[2];
1906   /* determine ownership of all rows */
1907   m = M/size + ((M % size) > rank);
1908   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1909   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1910   rowners[0] = 0;
1911   for (i=2; i<=size; i++) {
1912     rowners[i] += rowners[i-1];
1913   }
1914   rstart = rowners[rank];
1915   rend   = rowners[rank+1];
1916 
1917   /* distribute row lengths to all processors */
1918   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1919   offlens = ourlens + (rend-rstart);
1920   if (!rank) {
1921     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1922     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1923     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1924     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1925     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1926     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1927   } else {
1928     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1929   }
1930 
1931   if (!rank) {
1932     /* calculate the number of nonzeros on each processor */
1933     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1934     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1935     for (i=0; i<size; i++) {
1936       for (j=rowners[i]; j< rowners[i+1]; j++) {
1937         procsnz[i] += rowlengths[j];
1938       }
1939     }
1940     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1941 
1942     /* determine max buffer needed and allocate it */
1943     maxnz = 0;
1944     for (i=0; i<size; i++) {
1945       maxnz = PetscMax(maxnz,procsnz[i]);
1946     }
1947     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1948 
1949     /* read in my part of the matrix column indices  */
1950     nz   = procsnz[0];
1951     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1952     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1953 
1954     /* read in every one elses and ship off */
1955     for (i=1; i<size; i++) {
1956       nz   = procsnz[i];
1957       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1958       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1959     }
1960     ierr = PetscFree(cols);CHKERRQ(ierr);
1961   } else {
1962     /* determine buffer space needed for message */
1963     nz = 0;
1964     for (i=0; i<m; i++) {
1965       nz += ourlens[i];
1966     }
1967     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1968 
1969     /* receive message of column indices*/
1970     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1971     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1972     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1973   }
1974 
1975   /* determine column ownership if matrix is not square */
1976   if (N != M) {
1977     n      = N/size + ((N % size) > rank);
1978     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1979     cstart = cend - n;
1980   } else {
1981     cstart = rstart;
1982     cend   = rend;
1983     n      = cend - cstart;
1984   }
1985 
1986   /* loop over local rows, determining number of off diagonal entries */
1987   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1988   jj = 0;
1989   for (i=0; i<m; i++) {
1990     for (j=0; j<ourlens[i]; j++) {
1991       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1992       jj++;
1993     }
1994   }
1995 
1996   /* create our matrix */
1997   for (i=0; i<m; i++) {
1998     ourlens[i] -= offlens[i];
1999   }
2000   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2001   ierr = MatSetType(A,type);CHKERRQ(ierr);
2002   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2003 
2004   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2005   for (i=0; i<m; i++) {
2006     ourlens[i] += offlens[i];
2007   }
2008 
2009   if (!rank) {
2010     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2011 
2012     /* read in my part of the matrix numerical values  */
2013     nz   = procsnz[0];
2014     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2015 
2016     /* insert into matrix */
2017     jj      = rstart;
2018     smycols = mycols;
2019     svals   = vals;
2020     for (i=0; i<m; i++) {
2021       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2022       smycols += ourlens[i];
2023       svals   += ourlens[i];
2024       jj++;
2025     }
2026 
2027     /* read in other processors and ship out */
2028     for (i=1; i<size; i++) {
2029       nz   = procsnz[i];
2030       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2031       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2032     }
2033     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2034   } else {
2035     /* receive numeric values */
2036     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2037 
2038     /* receive message of values*/
2039     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2040     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2041     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2042 
2043     /* insert into matrix */
2044     jj      = rstart;
2045     smycols = mycols;
2046     svals   = vals;
2047     for (i=0; i<m; i++) {
2048       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2049       smycols += ourlens[i];
2050       svals   += ourlens[i];
2051       jj++;
2052     }
2053   }
2054   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2055   ierr = PetscFree(vals);CHKERRQ(ierr);
2056   ierr = PetscFree(mycols);CHKERRQ(ierr);
2057   ierr = PetscFree(rowners);CHKERRQ(ierr);
2058 
2059   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2061   *newmat = A;
2062   PetscFunctionReturn(0);
2063 }
2064 EXTERN_C_END
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2068 /*
2069     Not great since it makes two copies of the submatrix, first an SeqAIJ
2070   in local and then by concatenating the local matrices the end result.
2071   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2072 */
2073 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2074 {
2075   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2076   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2077   Mat          *local,M,Mreuse;
2078   PetscScalar  *vwork,*aa;
2079   MPI_Comm     comm = mat->comm;
2080   Mat_SeqAIJ   *aij;
2081 
2082 
2083   PetscFunctionBegin;
2084   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2085   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2086 
2087   if (call ==  MAT_REUSE_MATRIX) {
2088     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2089     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2090     local = &Mreuse;
2091     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2092   } else {
2093     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2094     Mreuse = *local;
2095     ierr   = PetscFree(local);CHKERRQ(ierr);
2096   }
2097 
2098   /*
2099       m - number of local rows
2100       n - number of columns (same on all processors)
2101       rstart - first row in new global matrix generated
2102   */
2103   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2104   if (call == MAT_INITIAL_MATRIX) {
2105     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2106     ii  = aij->i;
2107     jj  = aij->j;
2108 
2109     /*
2110         Determine the number of non-zeros in the diagonal and off-diagonal
2111         portions of the matrix in order to do correct preallocation
2112     */
2113 
2114     /* first get start and end of "diagonal" columns */
2115     if (csize == PETSC_DECIDE) {
2116       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2117       if (mglobal == n) { /* square matrix */
2118 	nlocal = m;
2119       } else {
2120         nlocal = n/size + ((n % size) > rank);
2121       }
2122     } else {
2123       nlocal = csize;
2124     }
2125     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2126     rstart = rend - nlocal;
2127     if (rank == size - 1 && rend != n) {
2128       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2129     }
2130 
2131     /* next, compute all the lengths */
2132     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2133     olens = dlens + m;
2134     for (i=0; i<m; i++) {
2135       jend = ii[i+1] - ii[i];
2136       olen = 0;
2137       dlen = 0;
2138       for (j=0; j<jend; j++) {
2139         if (*jj < rstart || *jj >= rend) olen++;
2140         else dlen++;
2141         jj++;
2142       }
2143       olens[i] = olen;
2144       dlens[i] = dlen;
2145     }
2146     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2147     ierr = PetscFree(dlens);CHKERRQ(ierr);
2148   } else {
2149     int ml,nl;
2150 
2151     M = *newmat;
2152     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2153     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2154     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2155     /*
2156          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2157        rather than the slower MatSetValues().
2158     */
2159     M->was_assembled = PETSC_TRUE;
2160     M->assembled     = PETSC_FALSE;
2161   }
2162   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2163   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2164   ii  = aij->i;
2165   jj  = aij->j;
2166   aa  = aij->a;
2167   for (i=0; i<m; i++) {
2168     row   = rstart + i;
2169     nz    = ii[i+1] - ii[i];
2170     cwork = jj;     jj += nz;
2171     vwork = aa;     aa += nz;
2172     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2173   }
2174 
2175   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   *newmat = M;
2178 
2179   /* save submatrix used in processor for next request */
2180   if (call ==  MAT_INITIAL_MATRIX) {
2181     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2182     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2183   }
2184 
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2190 /*@C
2191    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2192    (the default parallel PETSc format).  For good matrix assembly performance
2193    the user should preallocate the matrix storage by setting the parameters
2194    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2195    performance can be increased by more than a factor of 50.
2196 
2197    Collective on MPI_Comm
2198 
2199    Input Parameters:
2200 +  A - the matrix
2201 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2202            (same value is used for all local rows)
2203 .  d_nnz - array containing the number of nonzeros in the various rows of the
2204            DIAGONAL portion of the local submatrix (possibly different for each row)
2205            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2206            The size of this array is equal to the number of local rows, i.e 'm'.
2207            You must leave room for the diagonal entry even if it is zero.
2208 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2209            submatrix (same value is used for all local rows).
2210 -  o_nnz - array containing the number of nonzeros in the various rows of the
2211            OFF-DIAGONAL portion of the local submatrix (possibly different for
2212            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2213            structure. The size of this array is equal to the number
2214            of local rows, i.e 'm'.
2215 
2216    The AIJ format (also called the Yale sparse matrix format or
2217    compressed row storage), is fully compatible with standard Fortran 77
2218    storage.  That is, the stored row and column indices can begin at
2219    either one (as in Fortran) or zero.  See the users manual for details.
2220 
2221    The user MUST specify either the local or global matrix dimensions
2222    (possibly both).
2223 
2224    The parallel matrix is partitioned such that the first m0 rows belong to
2225    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2226    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2227 
2228    The DIAGONAL portion of the local submatrix of a processor can be defined
2229    as the submatrix which is obtained by extraction the part corresponding
2230    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2231    first row that belongs to the processor, and r2 is the last row belonging
2232    to the this processor. This is a square mxm matrix. The remaining portion
2233    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2234 
2235    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2236 
2237    By default, this format uses inodes (identical nodes) when possible.
2238    We search for consecutive rows with the same nonzero structure, thereby
2239    reusing matrix information to achieve increased efficiency.
2240 
2241    Options Database Keys:
2242 +  -mat_aij_no_inode  - Do not use inodes
2243 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2244 -  -mat_aij_oneindex - Internally use indexing starting at 1
2245         rather than 0.  Note that when calling MatSetValues(),
2246         the user still MUST index entries starting at 0!
2247 
2248    Example usage:
2249 
2250    Consider the following 8x8 matrix with 34 non-zero values, that is
2251    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2252    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2253    as follows:
2254 
2255 .vb
2256             1  2  0  |  0  3  0  |  0  4
2257     Proc0   0  5  6  |  7  0  0  |  8  0
2258             9  0 10  | 11  0  0  | 12  0
2259     -------------------------------------
2260            13  0 14  | 15 16 17  |  0  0
2261     Proc1   0 18  0  | 19 20 21  |  0  0
2262             0  0  0  | 22 23  0  | 24  0
2263     -------------------------------------
2264     Proc2  25 26 27  |  0  0 28  | 29  0
2265            30  0  0  | 31 32 33  |  0 34
2266 .ve
2267 
2268    This can be represented as a collection of submatrices as:
2269 
2270 .vb
2271       A B C
2272       D E F
2273       G H I
2274 .ve
2275 
2276    Where the submatrices A,B,C are owned by proc0, D,E,F are
2277    owned by proc1, G,H,I are owned by proc2.
2278 
2279    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2280    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2281    The 'M','N' parameters are 8,8, and have the same values on all procs.
2282 
2283    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2284    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2285    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2286    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2287    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2288    matrix, ans [DF] as another SeqAIJ matrix.
2289 
2290    When d_nz, o_nz parameters are specified, d_nz storage elements are
2291    allocated for every row of the local diagonal submatrix, and o_nz
2292    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2293    One way to choose d_nz and o_nz is to use the max nonzerors per local
2294    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2295    In this case, the values of d_nz,o_nz are:
2296 .vb
2297      proc0 : dnz = 2, o_nz = 2
2298      proc1 : dnz = 3, o_nz = 2
2299      proc2 : dnz = 1, o_nz = 4
2300 .ve
2301    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2302    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2303    for proc3. i.e we are using 12+15+10=37 storage locations to store
2304    34 values.
2305 
2306    When d_nnz, o_nnz parameters are specified, the storage is specified
2307    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2308    In the above case the values for d_nnz,o_nnz are:
2309 .vb
2310      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2311      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2312      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2313 .ve
2314    Here the space allocated is sum of all the above values i.e 34, and
2315    hence pre-allocation is perfect.
2316 
2317    Level: intermediate
2318 
2319 .keywords: matrix, aij, compressed row, sparse, parallel
2320 
2321 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2322 @*/
2323 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2324 {
2325   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2326 
2327   PetscFunctionBegin;
2328   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2329   if (f) {
2330     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2331   }
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatCreateMPIAIJ"
2337 /*@C
2338    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2339    (the default parallel PETSc format).  For good matrix assembly performance
2340    the user should preallocate the matrix storage by setting the parameters
2341    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2342    performance can be increased by more than a factor of 50.
2343 
2344    Collective on MPI_Comm
2345 
2346    Input Parameters:
2347 +  comm - MPI communicator
2348 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2349            This value should be the same as the local size used in creating the
2350            y vector for the matrix-vector product y = Ax.
2351 .  n - This value should be the same as the local size used in creating the
2352        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2353        calculated if N is given) For square matrices n is almost always m.
2354 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2355 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2356 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2357            (same value is used for all local rows)
2358 .  d_nnz - array containing the number of nonzeros in the various rows of the
2359            DIAGONAL portion of the local submatrix (possibly different for each row)
2360            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2361            The size of this array is equal to the number of local rows, i.e 'm'.
2362            You must leave room for the diagonal entry even if it is zero.
2363 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2364            submatrix (same value is used for all local rows).
2365 -  o_nnz - array containing the number of nonzeros in the various rows of the
2366            OFF-DIAGONAL portion of the local submatrix (possibly different for
2367            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2368            structure. The size of this array is equal to the number
2369            of local rows, i.e 'm'.
2370 
2371    Output Parameter:
2372 .  A - the matrix
2373 
2374    Notes:
2375    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2376    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2377    storage requirements for this matrix.
2378 
2379    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2380    processor than it must be used on all processors that share the object for
2381    that argument.
2382 
2383    The AIJ format (also called the Yale sparse matrix format or
2384    compressed row storage), is fully compatible with standard Fortran 77
2385    storage.  That is, the stored row and column indices can begin at
2386    either one (as in Fortran) or zero.  See the users manual for details.
2387 
2388    The user MUST specify either the local or global matrix dimensions
2389    (possibly both).
2390 
2391    The parallel matrix is partitioned such that the first m0 rows belong to
2392    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2393    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2394 
2395    The DIAGONAL portion of the local submatrix of a processor can be defined
2396    as the submatrix which is obtained by extraction the part corresponding
2397    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2398    first row that belongs to the processor, and r2 is the last row belonging
2399    to the this processor. This is a square mxm matrix. The remaining portion
2400    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2401 
2402    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2403 
2404    When calling this routine with a single process communicator, a matrix of
2405    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2406    type of communicator, use the construction mechanism:
2407      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2408 
2409    By default, this format uses inodes (identical nodes) when possible.
2410    We search for consecutive rows with the same nonzero structure, thereby
2411    reusing matrix information to achieve increased efficiency.
2412 
2413    Options Database Keys:
2414 +  -mat_aij_no_inode  - Do not use inodes
2415 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2416 -  -mat_aij_oneindex - Internally use indexing starting at 1
2417         rather than 0.  Note that when calling MatSetValues(),
2418         the user still MUST index entries starting at 0!
2419 
2420 
2421    Example usage:
2422 
2423    Consider the following 8x8 matrix with 34 non-zero values, that is
2424    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2425    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2426    as follows:
2427 
2428 .vb
2429             1  2  0  |  0  3  0  |  0  4
2430     Proc0   0  5  6  |  7  0  0  |  8  0
2431             9  0 10  | 11  0  0  | 12  0
2432     -------------------------------------
2433            13  0 14  | 15 16 17  |  0  0
2434     Proc1   0 18  0  | 19 20 21  |  0  0
2435             0  0  0  | 22 23  0  | 24  0
2436     -------------------------------------
2437     Proc2  25 26 27  |  0  0 28  | 29  0
2438            30  0  0  | 31 32 33  |  0 34
2439 .ve
2440 
2441    This can be represented as a collection of submatrices as:
2442 
2443 .vb
2444       A B C
2445       D E F
2446       G H I
2447 .ve
2448 
2449    Where the submatrices A,B,C are owned by proc0, D,E,F are
2450    owned by proc1, G,H,I are owned by proc2.
2451 
2452    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2453    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2454    The 'M','N' parameters are 8,8, and have the same values on all procs.
2455 
2456    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2457    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2458    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2459    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2460    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2461    matrix, ans [DF] as another SeqAIJ matrix.
2462 
2463    When d_nz, o_nz parameters are specified, d_nz storage elements are
2464    allocated for every row of the local diagonal submatrix, and o_nz
2465    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2466    One way to choose d_nz and o_nz is to use the max nonzerors per local
2467    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2468    In this case, the values of d_nz,o_nz are:
2469 .vb
2470      proc0 : dnz = 2, o_nz = 2
2471      proc1 : dnz = 3, o_nz = 2
2472      proc2 : dnz = 1, o_nz = 4
2473 .ve
2474    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2475    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2476    for proc3. i.e we are using 12+15+10=37 storage locations to store
2477    34 values.
2478 
2479    When d_nnz, o_nnz parameters are specified, the storage is specified
2480    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2481    In the above case the values for d_nnz,o_nnz are:
2482 .vb
2483      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2484      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2485      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2486 .ve
2487    Here the space allocated is sum of all the above values i.e 34, and
2488    hence pre-allocation is perfect.
2489 
2490    Level: intermediate
2491 
2492 .keywords: matrix, aij, compressed row, sparse, parallel
2493 
2494 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2495 @*/
2496 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2497 {
2498   int ierr,size;
2499 
2500   PetscFunctionBegin;
2501   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2502   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2503   if (size > 1) {
2504     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2505     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2506   } else {
2507     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2508     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2509   }
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2515 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2516 {
2517   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2518   PetscFunctionBegin;
2519   *Ad     = a->A;
2520   *Ao     = a->B;
2521   *colmap = a->garray;
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2527 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2528 {
2529   int        ierr,i;
2530   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2531 
2532   PetscFunctionBegin;
2533   if (coloring->ctype == IS_COLORING_LOCAL) {
2534     ISColoringValue *allcolors,*colors;
2535     ISColoring      ocoloring;
2536 
2537     /* set coloring for diagonal portion */
2538     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2539 
2540     /* set coloring for off-diagonal portion */
2541     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2542     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2543     for (i=0; i<a->B->n; i++) {
2544       colors[i] = allcolors[a->garray[i]];
2545     }
2546     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2547     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2548     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2549     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2550   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2551     ISColoringValue *colors;
2552     int             *larray;
2553     ISColoring      ocoloring;
2554 
2555     /* set coloring for diagonal portion */
2556     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2557     for (i=0; i<a->A->n; i++) {
2558       larray[i] = i + a->cstart;
2559     }
2560     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2561     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2562     for (i=0; i<a->A->n; i++) {
2563       colors[i] = coloring->colors[larray[i]];
2564     }
2565     ierr = PetscFree(larray);CHKERRQ(ierr);
2566     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2567     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2568     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2569 
2570     /* set coloring for off-diagonal portion */
2571     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2572     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2573     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2574     for (i=0; i<a->B->n; i++) {
2575       colors[i] = coloring->colors[larray[i]];
2576     }
2577     ierr = PetscFree(larray);CHKERRQ(ierr);
2578     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2579     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2580     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2581   } else {
2582     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2583   }
2584 
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2590 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2591 {
2592   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2593   int        ierr;
2594 
2595   PetscFunctionBegin;
2596   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2597   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2603 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2604 {
2605   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2606   int        ierr;
2607 
2608   PetscFunctionBegin;
2609   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2610   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatMerge"
2616 /*@C
2617       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2618                  matrices from each processor
2619 
2620     Collective on MPI_Comm
2621 
2622    Input Parameters:
2623 +    comm - the communicators the parallel matrix will live on
2624 -    inmat - the input sequential matrices
2625 
2626    Output Parameter:
2627 .    outmat - the parallel matrix generated
2628 
2629     Level: advanced
2630 
2631    Notes: The number of columns of the matrix in EACH of the seperate files
2632       MUST be the same.
2633 
2634 @*/
2635 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2636 {
2637   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2638   PetscScalar *values;
2639   PetscMap    columnmap,rowmap;
2640 
2641   PetscFunctionBegin;
2642 
2643   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2644 
2645   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2646   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2647   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2648   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2649   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2650   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2651 
2652   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2653   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2654   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2655   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2656   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2657 
2658   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2659   for (i=0;i<m;i++) {
2660     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2661     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2662     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2663   }
2664   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2665   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2666 
2667   for (i=0;i<m;i++) {
2668     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2669     I    = i + rstart;
2670     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2671     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2672   }
2673   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2674   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2675   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2676 
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 #undef __FUNCT__
2681 #define __FUNCT__ "MatFileSplit"
2682 int MatFileSplit(Mat A,char *outfile)
2683 {
2684   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2685   PetscViewer out;
2686   char        *name;
2687   Mat         B;
2688   PetscScalar *values;
2689 
2690   PetscFunctionBegin;
2691 
2692   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2693   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2694   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2695   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2696   for (i=0;i<m;i++) {
2697     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2698     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2699     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2700   }
2701   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2702   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2703 
2704   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2705   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2706   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2707   sprintf(name,"%s.%d",outfile,rank);
2708   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2709   ierr = PetscFree(name);
2710   ierr = MatView(B,out);CHKERRQ(ierr);
2711   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2712   ierr = MatDestroy(B);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715