xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 97e3ddc344d76560a97e10de50bd3d5e96ef24d6)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421 
422 #if defined(PETSC_HAVE_MUMPS)
423   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
424   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
431 int MatZeroEntries_MPIAIJ(Mat A)
432 {
433   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
434   int        ierr;
435 
436   PetscFunctionBegin;
437   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
438   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatZeroRows_MPIAIJ"
444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
445 {
446   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
447   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
448   int            *nprocs,j,idx,nsends,row;
449   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
450   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
451   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
452   MPI_Comm       comm = A->comm;
453   MPI_Request    *send_waits,*recv_waits;
454   MPI_Status     recv_status,*send_status;
455   IS             istmp;
456   PetscTruth     found;
457 
458   PetscFunctionBegin;
459   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
460   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
461 
462   /*  first count number of contributors to each processor */
463   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
464   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
465   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
466   for (i=0; i<N; i++) {
467     idx = rows[i];
468     found = PETSC_FALSE;
469     for (j=0; j<size; j++) {
470       if (idx >= owners[j] && idx < owners[j+1]) {
471         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
472       }
473     }
474     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   PetscLogObjectParent(A,istmp);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   int        ierr,nt;
595 
596   PetscFunctionBegin;
597   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
598   if (nt != A->n) {
599     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
600   }
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_MPIAIJ"
610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
613   int        ierr;
614 
615   PetscFunctionBegin;
616   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
617   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
618   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
619   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
648 {
649   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
651   MatType    type;
652   IS         Me,Notme;
653   int        M,N,first,last,*notme,i, ierr;
654 
655   PetscFunctionBegin;
656 
657   /* Easy test: symmetric diagonal block */
658   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
659   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
660   if (!*f) PetscFunctionReturn(0);
661 
662   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
663   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
664   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
665   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
666   for (i=0; i<first; i++) notme[i] = i;
667   for (i=last; i<M; i++) notme[i-last+first] = i;
668   ierr = ISCreateGeneral
669     (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
670   ierr = ISCreateStride
671     (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
672   ierr = MatGetSubMatrices
673     (Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
674   Aoff = Aoffs[0];
675   ierr = MatGetSubMatrices
676     (Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
677   Boff = Boffs[0];
678   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
679   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
680   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
681   ierr = ISDestroy(Me); CHKERRQ(ierr);
682   ierr = ISDestroy(Notme); CHKERRQ(ierr);
683 
684   PetscFunctionReturn(0);
685 }
686 EXTERN_C_END
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
690 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
691 {
692   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696   /* do nondiagonal part */
697   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
698   /* send it on its way */
699   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
700   /* do local part */
701   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
702   /* receive remote parts: note this assumes the values are not actually */
703   /* inserted in yy until the next line, which is true for my implementation*/
704   /* but is not perhaps always true. */
705   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 /*
710   This only works correctly for square matrices where the subblock A->A is the
711    diagonal block
712 */
713 #undef __FUNCT__
714 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
715 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
716 {
717   int        ierr;
718   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
719 
720   PetscFunctionBegin;
721   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
722   if (a->rstart != a->cstart || a->rend != a->cend) {
723     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
724   }
725   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatScale_MPIAIJ"
731 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
732 {
733   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
734   int        ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
738   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPIAIJ"
744 int MatDestroy_MPIAIJ(Mat mat)
745 {
746   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
747   int        ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
752 #endif
753   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
754   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
755   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
756   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
757 #if defined (PETSC_USE_CTABLE)
758   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
759 #else
760   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
761 #endif
762   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
763   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
764   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
765   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
766   ierr = PetscFree(aij);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
771 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
772 extern int MatFactorInfo_MUMPS(Mat,PetscViewer);
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPIAIJ_Binary"
776 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
779   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
780   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
781   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
782   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
783   PetscScalar       *column_values;
784 
785   PetscFunctionBegin;
786   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
787   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
788   nz   = A->nz + B->nz;
789   if (rank == 0) {
790     header[0] = MAT_FILE_COOKIE;
791     header[1] = mat->M;
792     header[2] = mat->N;
793     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
794     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
795     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
796     /* get largest number of rows any processor has */
797     rlen = mat->m;
798     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
799     for (i=1; i<size; i++) {
800       rlen = PetscMax(rlen,range[i+1] - range[i]);
801     }
802   } else {
803     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
804     rlen = mat->m;
805   }
806 
807   /* load up the local row counts */
808   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
809   for (i=0; i<mat->m; i++) {
810     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
811   }
812 
813   /* store the row lengths to the file */
814   if (rank == 0) {
815     MPI_Status status;
816     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
817     for (i=1; i<size; i++) {
818       rlen = range[i+1] - range[i];
819       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
821     }
822   } else {
823     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
824   }
825   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
826 
827   /* load up the local column indices */
828   nzmax = nz; /* )th processor needs space a largest processor needs */
829   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
830   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
831   cnt  = 0;
832   for (i=0; i<mat->m; i++) {
833     for (j=B->i[i]; j<B->i[i+1]; j++) {
834       if ( (col = garray[B->j[j]]) > cstart) break;
835       column_indices[cnt++] = col;
836     }
837     for (k=A->i[i]; k<A->i[i+1]; k++) {
838       column_indices[cnt++] = A->j[k] + cstart;
839     }
840     for (; j<B->i[i+1]; j++) {
841       column_indices[cnt++] = garray[B->j[j]];
842     }
843   }
844   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
845 
846   /* store the column indices to the file */
847   if (rank == 0) {
848     MPI_Status status;
849     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
850     for (i=1; i<size; i++) {
851       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
852       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
853       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
855     }
856   } else {
857     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
858     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(column_indices);CHKERRQ(ierr);
861 
862   /* load up the local column values */
863   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
864   cnt  = 0;
865   for (i=0; i<mat->m; i++) {
866     for (j=B->i[i]; j<B->i[i+1]; j++) {
867       if ( garray[B->j[j]] > cstart) break;
868       column_values[cnt++] = B->a[j];
869     }
870     for (k=A->i[i]; k<A->i[i+1]; k++) {
871       column_values[cnt++] = A->a[k];
872     }
873     for (; j<B->i[i+1]; j++) {
874       column_values[cnt++] = B->a[j];
875     }
876   }
877   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
878 
879   /* store the column values to the file */
880   if (rank == 0) {
881     MPI_Status status;
882     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
883     for (i=1; i<size; i++) {
884       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
885       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
886       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
887       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
888     }
889   } else {
890     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
891     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
892   }
893   ierr = PetscFree(column_values);CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
899 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
900 {
901   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
902   int               ierr,rank = aij->rank,size = aij->size;
903   PetscTruth        isdraw,isascii,flg,isbinary;
904   PetscViewer       sviewer;
905   PetscViewerFormat format;
906 
907   PetscFunctionBegin;
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
909   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
911   if (isascii) {
912     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
913     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
914       MatInfo info;
915       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
916       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
917       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
918       if (flg) {
919         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
920 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
921       } else {
922         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
923 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
924       }
925       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
926       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
927       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
928       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
929       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
930       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
931       PetscFunctionReturn(0);
932     } else if (format == PETSC_VIEWER_ASCII_INFO) {
933       PetscFunctionReturn(0);
934     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
935 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
936       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
937 #endif
938 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
939       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
940 #endif
941 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE)
942       ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr);
943 #endif
944       PetscFunctionReturn(0);
945     }
946   } else if (isbinary) {
947     if (size == 1) {
948       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
949       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
950     } else {
951       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
952     }
953     PetscFunctionReturn(0);
954   } else if (isdraw) {
955     PetscDraw  draw;
956     PetscTruth isnull;
957     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
958     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
959   }
960 
961   if (size == 1) {
962     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
963     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
964   } else {
965     /* assemble the entire matrix onto first processor. */
966     Mat         A;
967     Mat_SeqAIJ *Aloc;
968     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
969     PetscScalar *a;
970 
971     if (!rank) {
972       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
973     } else {
974       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
975     }
976     PetscLogObjectParent(mat,A);
977 
978     /* copy over the A part */
979     Aloc = (Mat_SeqAIJ*)aij->A->data;
980     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
981     row = aij->rstart;
982     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
983     for (i=0; i<m; i++) {
984       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
985       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
986     }
987     aj = Aloc->j;
988     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
989 
990     /* copy over the B part */
991     Aloc = (Mat_SeqAIJ*)aij->B->data;
992     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
993     row  = aij->rstart;
994     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
995     ct   = cols;
996     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
997     for (i=0; i<m; i++) {
998       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
999       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1000     }
1001     ierr = PetscFree(ct);CHKERRQ(ierr);
1002     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004     /*
1005        Everyone has to call to draw the matrix since the graphics waits are
1006        synchronized across all processors that share the PetscDraw object
1007     */
1008     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1009     if (!rank) {
1010       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1011       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1012     }
1013     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1014     ierr = MatDestroy(A);CHKERRQ(ierr);
1015   }
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatView_MPIAIJ"
1021 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1022 {
1023   int        ierr;
1024   PetscTruth isascii,isdraw,issocket,isbinary;
1025 
1026   PetscFunctionBegin;
1027   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1028   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1029   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1030   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1031   if (isascii || isdraw || isbinary || issocket) {
1032     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1033   } else {
1034     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1035   }
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatRelax_MPIAIJ"
1043 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1044 {
1045   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1046   int          ierr;
1047   Vec          bb1;
1048   PetscScalar  mone=-1.0;
1049 
1050   PetscFunctionBegin;
1051   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1052 
1053   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1054 
1055   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1056     if (flag & SOR_ZERO_INITIAL_GUESS) {
1057       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1058       its--;
1059     }
1060 
1061     while (its--) {
1062       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1063       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1064 
1065       /* update rhs: bb1 = bb - B*x */
1066       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1067       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1068 
1069       /* local sweep */
1070       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1071       CHKERRQ(ierr);
1072     }
1073   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1074     if (flag & SOR_ZERO_INITIAL_GUESS) {
1075       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1076       its--;
1077     }
1078     while (its--) {
1079       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1080       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1081 
1082       /* update rhs: bb1 = bb - B*x */
1083       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1084       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1085 
1086       /* local sweep */
1087       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1088       CHKERRQ(ierr);
1089     }
1090   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1091     if (flag & SOR_ZERO_INITIAL_GUESS) {
1092       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1093       its--;
1094     }
1095     while (its--) {
1096       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1097       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1098 
1099       /* update rhs: bb1 = bb - B*x */
1100       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1101       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1102 
1103       /* local sweep */
1104       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1105       CHKERRQ(ierr);
1106     }
1107   } else {
1108     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1109   }
1110 
1111   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1117 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1118 {
1119   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1120   Mat        A = mat->A,B = mat->B;
1121   int        ierr;
1122   PetscReal  isend[5],irecv[5];
1123 
1124   PetscFunctionBegin;
1125   info->block_size     = 1.0;
1126   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1127   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1128   isend[3] = info->memory;  isend[4] = info->mallocs;
1129   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1130   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1131   isend[3] += info->memory;  isend[4] += info->mallocs;
1132   if (flag == MAT_LOCAL) {
1133     info->nz_used      = isend[0];
1134     info->nz_allocated = isend[1];
1135     info->nz_unneeded  = isend[2];
1136     info->memory       = isend[3];
1137     info->mallocs      = isend[4];
1138   } else if (flag == MAT_GLOBAL_MAX) {
1139     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1140     info->nz_used      = irecv[0];
1141     info->nz_allocated = irecv[1];
1142     info->nz_unneeded  = irecv[2];
1143     info->memory       = irecv[3];
1144     info->mallocs      = irecv[4];
1145   } else if (flag == MAT_GLOBAL_SUM) {
1146     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1147     info->nz_used      = irecv[0];
1148     info->nz_allocated = irecv[1];
1149     info->nz_unneeded  = irecv[2];
1150     info->memory       = irecv[3];
1151     info->mallocs      = irecv[4];
1152   }
1153   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1154   info->fill_ratio_needed = 0;
1155   info->factor_mallocs    = 0;
1156   info->rows_global       = (double)matin->M;
1157   info->columns_global    = (double)matin->N;
1158   info->rows_local        = (double)matin->m;
1159   info->columns_local     = (double)matin->N;
1160 
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatSetOption_MPIAIJ"
1166 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1167 {
1168   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1169   int        ierr;
1170 
1171   PetscFunctionBegin;
1172   switch (op) {
1173   case MAT_NO_NEW_NONZERO_LOCATIONS:
1174   case MAT_YES_NEW_NONZERO_LOCATIONS:
1175   case MAT_COLUMNS_UNSORTED:
1176   case MAT_COLUMNS_SORTED:
1177   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1178   case MAT_KEEP_ZEROED_ROWS:
1179   case MAT_NEW_NONZERO_LOCATION_ERR:
1180   case MAT_USE_INODES:
1181   case MAT_DO_NOT_USE_INODES:
1182   case MAT_IGNORE_ZERO_ENTRIES:
1183     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1184     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1185     break;
1186   case MAT_ROW_ORIENTED:
1187     a->roworiented = PETSC_TRUE;
1188     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1189     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1190     break;
1191   case MAT_ROWS_SORTED:
1192   case MAT_ROWS_UNSORTED:
1193   case MAT_YES_NEW_DIAGONALS:
1194     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1195     break;
1196   case MAT_COLUMN_ORIENTED:
1197     a->roworiented = PETSC_FALSE;
1198     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1199     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1200     break;
1201   case MAT_IGNORE_OFF_PROC_ENTRIES:
1202     a->donotstash = PETSC_TRUE;
1203     break;
1204   case MAT_NO_NEW_DIAGONALS:
1205     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1206   default:
1207     SETERRQ(PETSC_ERR_SUP,"unknown option");
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatGetRow_MPIAIJ"
1214 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1215 {
1216   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1217   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1219   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1220   int          *cmap,*idx_p;
1221 
1222   PetscFunctionBegin;
1223   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1224   mat->getrowactive = PETSC_TRUE;
1225 
1226   if (!mat->rowvalues && (idx || v)) {
1227     /*
1228         allocate enough space to hold information from the longest row.
1229     */
1230     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1231     int     max = 1,tmp;
1232     for (i=0; i<matin->m; i++) {
1233       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1234       if (max < tmp) { max = tmp; }
1235     }
1236     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1237     mat->rowindices = (int*)(mat->rowvalues + max);
1238   }
1239 
1240   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1241   lrow = row - rstart;
1242 
1243   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1244   if (!v)   {pvA = 0; pvB = 0;}
1245   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1246   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1247   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1248   nztot = nzA + nzB;
1249 
1250   cmap  = mat->garray;
1251   if (v  || idx) {
1252     if (nztot) {
1253       /* Sort by increasing column numbers, assuming A and B already sorted */
1254       int imark = -1;
1255       if (v) {
1256         *v = v_p = mat->rowvalues;
1257         for (i=0; i<nzB; i++) {
1258           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1259           else break;
1260         }
1261         imark = i;
1262         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1263         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1264       }
1265       if (idx) {
1266         *idx = idx_p = mat->rowindices;
1267         if (imark > -1) {
1268           for (i=0; i<imark; i++) {
1269             idx_p[i] = cmap[cworkB[i]];
1270           }
1271         } else {
1272           for (i=0; i<nzB; i++) {
1273             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1274             else break;
1275           }
1276           imark = i;
1277         }
1278         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1279         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1280       }
1281     } else {
1282       if (idx) *idx = 0;
1283       if (v)   *v   = 0;
1284     }
1285   }
1286   *nz = nztot;
1287   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1288   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1294 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1295 {
1296   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1297 
1298   PetscFunctionBegin;
1299   if (aij->getrowactive == PETSC_FALSE) {
1300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1301   }
1302   aij->getrowactive = PETSC_FALSE;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatNorm_MPIAIJ"
1308 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1309 {
1310   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1312   int          ierr,i,j,cstart = aij->cstart;
1313   PetscReal    sum = 0.0;
1314   PetscScalar  *v;
1315 
1316   PetscFunctionBegin;
1317   if (aij->size == 1) {
1318     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1319   } else {
1320     if (type == NORM_FROBENIUS) {
1321       v = amat->a;
1322       for (i=0; i<amat->nz; i++) {
1323 #if defined(PETSC_USE_COMPLEX)
1324         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1325 #else
1326         sum += (*v)*(*v); v++;
1327 #endif
1328       }
1329       v = bmat->a;
1330       for (i=0; i<bmat->nz; i++) {
1331 #if defined(PETSC_USE_COMPLEX)
1332         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1333 #else
1334         sum += (*v)*(*v); v++;
1335 #endif
1336       }
1337       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1338       *norm = sqrt(*norm);
1339     } else if (type == NORM_1) { /* max column norm */
1340       PetscReal *tmp,*tmp2;
1341       int    *jj,*garray = aij->garray;
1342       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1343       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1344       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1345       *norm = 0.0;
1346       v = amat->a; jj = amat->j;
1347       for (j=0; j<amat->nz; j++) {
1348         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1349       }
1350       v = bmat->a; jj = bmat->j;
1351       for (j=0; j<bmat->nz; j++) {
1352         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1353       }
1354       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1355       for (j=0; j<mat->N; j++) {
1356         if (tmp2[j] > *norm) *norm = tmp2[j];
1357       }
1358       ierr = PetscFree(tmp);CHKERRQ(ierr);
1359       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1360     } else if (type == NORM_INFINITY) { /* max row norm */
1361       PetscReal ntemp = 0.0;
1362       for (j=0; j<aij->A->m; j++) {
1363         v = amat->a + amat->i[j];
1364         sum = 0.0;
1365         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1366           sum += PetscAbsScalar(*v); v++;
1367         }
1368         v = bmat->a + bmat->i[j];
1369         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1370           sum += PetscAbsScalar(*v); v++;
1371         }
1372         if (sum > ntemp) ntemp = sum;
1373       }
1374       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1375     } else {
1376       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1377     }
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatTranspose_MPIAIJ"
1384 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1385 {
1386   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1387   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1388   int          ierr;
1389   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1390   Mat          B;
1391   PetscScalar  *array;
1392 
1393   PetscFunctionBegin;
1394   if (!matout && M != N) {
1395     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1396   }
1397 
1398   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1399 
1400   /* copy over the A part */
1401   Aloc = (Mat_SeqAIJ*)a->A->data;
1402   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1403   row = a->rstart;
1404   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1405   for (i=0; i<m; i++) {
1406     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1407     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1408   }
1409   aj = Aloc->j;
1410   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1411 
1412   /* copy over the B part */
1413   Aloc = (Mat_SeqAIJ*)a->B->data;
1414   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1415   row  = a->rstart;
1416   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1417   ct   = cols;
1418   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1419   for (i=0; i<m; i++) {
1420     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1421     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1422   }
1423   ierr = PetscFree(ct);CHKERRQ(ierr);
1424   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426   if (matout) {
1427     *matout = B;
1428   } else {
1429     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1436 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1437 {
1438   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1439   Mat        a = aij->A,b = aij->B;
1440   int        ierr,s1,s2,s3;
1441 
1442   PetscFunctionBegin;
1443   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1444   if (rr) {
1445     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1446     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1447     /* Overlap communication with computation. */
1448     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1449   }
1450   if (ll) {
1451     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1452     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1453     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1454   }
1455   /* scale  the diagonal block */
1456   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1457 
1458   if (rr) {
1459     /* Do a scatter end and then right scale the off-diagonal block */
1460     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1461     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1462   }
1463 
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1470 int MatPrintHelp_MPIAIJ(Mat A)
1471 {
1472   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1473   int        ierr;
1474 
1475   PetscFunctionBegin;
1476   if (!a->rank) {
1477     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1484 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1485 {
1486   PetscFunctionBegin;
1487   *bs = 1;
1488   PetscFunctionReturn(0);
1489 }
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1492 int MatSetUnfactored_MPIAIJ(Mat A)
1493 {
1494   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1495   int        ierr;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatEqual_MPIAIJ"
1504 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1505 {
1506   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1507   Mat        a,b,c,d;
1508   PetscTruth flg;
1509   int        ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1513   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1514   a = matA->A; b = matA->B;
1515   c = matB->A; d = matB->B;
1516 
1517   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1518   if (flg == PETSC_TRUE) {
1519     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1520   }
1521   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatCopy_MPIAIJ"
1527 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1528 {
1529   int        ierr;
1530   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1531   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1532   PetscTruth flg;
1533 
1534   PetscFunctionBegin;
1535   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1536   if (str != SAME_NONZERO_PATTERN || !flg) {
1537     /* because of the column compression in the off-processor part of the matrix a->B,
1538        the number of columns in a->B and b->B may be different, hence we cannot call
1539        the MatCopy() directly on the two parts. If need be, we can provide a more
1540        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1541        then copying the submatrices */
1542     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1543   } else {
1544     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1545     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1552 int MatSetUpPreallocation_MPIAIJ(Mat A)
1553 {
1554   int        ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1562 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1563 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1564 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1565 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1566 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1567 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1568 #endif
1569 
1570 #include "petscblaslapack.h"
1571 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatAXPY_MPIAIJ"
1574 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1575 {
1576   int        ierr,one=1,i;
1577   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1578   Mat_SeqAIJ *x,*y;
1579 
1580   PetscFunctionBegin;
1581   if (str == SAME_NONZERO_PATTERN) {
1582     x = (Mat_SeqAIJ *)xx->A->data;
1583     y = (Mat_SeqAIJ *)yy->A->data;
1584     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1585     x = (Mat_SeqAIJ *)xx->B->data;
1586     y = (Mat_SeqAIJ *)yy->B->data;
1587     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1588   } else if (str == SUBSET_NONZERO_PATTERN) {
1589     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1590 
1591     x = (Mat_SeqAIJ *)xx->B->data;
1592     y = (Mat_SeqAIJ *)yy->B->data;
1593     if (y->xtoy && y->XtoY != xx->B) {
1594       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1595       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1596     }
1597     if (!y->xtoy) { /* get xtoy */
1598       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1599       y->XtoY = xx->B;
1600     }
1601     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1602   } else {
1603     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1604   }
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 /* -------------------------------------------------------------------*/
1609 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1610        MatGetRow_MPIAIJ,
1611        MatRestoreRow_MPIAIJ,
1612        MatMult_MPIAIJ,
1613        MatMultAdd_MPIAIJ,
1614        MatMultTranspose_MPIAIJ,
1615        MatMultTransposeAdd_MPIAIJ,
1616        0,
1617        0,
1618        0,
1619        0,
1620        0,
1621        0,
1622        MatRelax_MPIAIJ,
1623        MatTranspose_MPIAIJ,
1624        MatGetInfo_MPIAIJ,
1625        MatEqual_MPIAIJ,
1626        MatGetDiagonal_MPIAIJ,
1627        MatDiagonalScale_MPIAIJ,
1628        MatNorm_MPIAIJ,
1629        MatAssemblyBegin_MPIAIJ,
1630        MatAssemblyEnd_MPIAIJ,
1631        0,
1632        MatSetOption_MPIAIJ,
1633        MatZeroEntries_MPIAIJ,
1634        MatZeroRows_MPIAIJ,
1635 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1636        MatLUFactorSymbolic_MPIAIJ_TFS,
1637 #else
1638        0,
1639 #endif
1640        0,
1641        0,
1642        0,
1643        MatSetUpPreallocation_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        0,
1648        MatDuplicate_MPIAIJ,
1649        0,
1650        0,
1651        0,
1652        0,
1653        MatAXPY_MPIAIJ,
1654        MatGetSubMatrices_MPIAIJ,
1655        MatIncreaseOverlap_MPIAIJ,
1656        MatGetValues_MPIAIJ,
1657        MatCopy_MPIAIJ,
1658        MatPrintHelp_MPIAIJ,
1659        MatScale_MPIAIJ,
1660        0,
1661        0,
1662        0,
1663        MatGetBlockSize_MPIAIJ,
1664        0,
1665        0,
1666        0,
1667        0,
1668        MatFDColoringCreate_MPIAIJ,
1669        0,
1670        MatSetUnfactored_MPIAIJ,
1671        0,
1672        0,
1673        MatGetSubMatrix_MPIAIJ,
1674        MatDestroy_MPIAIJ,
1675        MatView_MPIAIJ,
1676        MatGetPetscMaps_Petsc,
1677        0,
1678        0,
1679        0,
1680        0,
1681        0,
1682        0,
1683        0,
1684        0,
1685        MatSetColoring_MPIAIJ,
1686        MatSetValuesAdic_MPIAIJ,
1687        MatSetValuesAdifor_MPIAIJ
1688 };
1689 
1690 /* ----------------------------------------------------------------------------------------*/
1691 
1692 EXTERN_C_BEGIN
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1695 int MatStoreValues_MPIAIJ(Mat mat)
1696 {
1697   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1698   int        ierr;
1699 
1700   PetscFunctionBegin;
1701   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1702   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 EXTERN_C_END
1706 
1707 EXTERN_C_BEGIN
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1710 int MatRetrieveValues_MPIAIJ(Mat mat)
1711 {
1712   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1713   int        ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1717   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1718   PetscFunctionReturn(0);
1719 }
1720 EXTERN_C_END
1721 
1722 #include "petscpc.h"
1723 EXTERN_C_BEGIN
1724 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1725 EXTERN_C_END
1726 
1727 EXTERN_C_BEGIN
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1730 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1731 {
1732   Mat_MPIAIJ   *b;
1733   int          ierr,i;
1734   PetscTruth   flg2;
1735 
1736   PetscFunctionBegin;
1737   B->preallocated = PETSC_TRUE;
1738   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1739   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1740   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1741   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1742   if (d_nnz) {
1743     for (i=0; i<B->m; i++) {
1744       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1745     }
1746   }
1747   if (o_nnz) {
1748     for (i=0; i<B->m; i++) {
1749       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1750     }
1751   }
1752   b = (Mat_MPIAIJ*)B->data;
1753 
1754   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1755   PetscLogObjectParent(B,b->A);
1756   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1757   PetscLogObjectParent(B,b->B);
1758 
1759   PetscFunctionReturn(0);
1760 }
1761 EXTERN_C_END
1762 
1763 EXTERN_C_BEGIN
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatCreate_MPIAIJ"
1766 int MatCreate_MPIAIJ(Mat B)
1767 {
1768   Mat_MPIAIJ *b;
1769   int        ierr,i,size;
1770 
1771   PetscFunctionBegin;
1772   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1773 
1774   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1775   B->data         = (void*)b;
1776   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1777   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1778   B->factor       = 0;
1779   B->assembled    = PETSC_FALSE;
1780   B->mapping      = 0;
1781 
1782   B->insertmode      = NOT_SET_VALUES;
1783   b->size            = size;
1784   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1785 
1786   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1787   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1788 
1789   /* the information in the maps duplicates the information computed below, eventually
1790      we should remove the duplicate information that is not contained in the maps */
1791   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1792   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1793 
1794   /* build local table of row and column ownerships */
1795   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1796   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1797   b->cowners = b->rowners + b->size + 2;
1798   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1799   b->rowners[0] = 0;
1800   for (i=2; i<=b->size; i++) {
1801     b->rowners[i] += b->rowners[i-1];
1802   }
1803   b->rstart = b->rowners[b->rank];
1804   b->rend   = b->rowners[b->rank+1];
1805   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1806   b->cowners[0] = 0;
1807   for (i=2; i<=b->size; i++) {
1808     b->cowners[i] += b->cowners[i-1];
1809   }
1810   b->cstart = b->cowners[b->rank];
1811   b->cend   = b->cowners[b->rank+1];
1812 
1813   /* build cache for off array entries formed */
1814   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1815   b->donotstash  = PETSC_FALSE;
1816   b->colmap      = 0;
1817   b->garray      = 0;
1818   b->roworiented = PETSC_TRUE;
1819 
1820   /* stuff used for matrix vector multiply */
1821   b->lvec      = PETSC_NULL;
1822   b->Mvctx     = PETSC_NULL;
1823 
1824   /* stuff for MatGetRow() */
1825   b->rowindices   = 0;
1826   b->rowvalues    = 0;
1827   b->getrowactive = PETSC_FALSE;
1828 
1829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1830                                      "MatStoreValues_MPIAIJ",
1831                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1833                                      "MatRetrieveValues_MPIAIJ",
1834                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1836 				     "MatGetDiagonalBlock_MPIAIJ",
1837                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1839 				     "MatIsSymmetric_MPIAIJ",
1840 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1842 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1843 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 EXTERN_C_END
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1850 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1851 {
1852   Mat        mat;
1853   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1854   int        ierr;
1855 
1856   PetscFunctionBegin;
1857   *newmat       = 0;
1858   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1859   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1860   a    = (Mat_MPIAIJ*)mat->data;
1861   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1862   mat->factor       = matin->factor;
1863   mat->assembled    = PETSC_TRUE;
1864   mat->insertmode   = NOT_SET_VALUES;
1865   mat->preallocated = PETSC_TRUE;
1866 
1867   a->rstart       = oldmat->rstart;
1868   a->rend         = oldmat->rend;
1869   a->cstart       = oldmat->cstart;
1870   a->cend         = oldmat->cend;
1871   a->size         = oldmat->size;
1872   a->rank         = oldmat->rank;
1873   a->donotstash   = oldmat->donotstash;
1874   a->roworiented  = oldmat->roworiented;
1875   a->rowindices   = 0;
1876   a->rowvalues    = 0;
1877   a->getrowactive = PETSC_FALSE;
1878 
1879   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1880   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1881   if (oldmat->colmap) {
1882 #if defined (PETSC_USE_CTABLE)
1883     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1884 #else
1885     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1886     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1887     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1888 #endif
1889   } else a->colmap = 0;
1890   if (oldmat->garray) {
1891     int len;
1892     len  = oldmat->B->n;
1893     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1894     PetscLogObjectMemory(mat,len*sizeof(int));
1895     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1896   } else a->garray = 0;
1897 
1898   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1899   PetscLogObjectParent(mat,a->lvec);
1900   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1901   PetscLogObjectParent(mat,a->Mvctx);
1902   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1903   PetscLogObjectParent(mat,a->A);
1904   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1905   PetscLogObjectParent(mat,a->B);
1906   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1907   *newmat = mat;
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #include "petscsys.h"
1912 
1913 EXTERN_C_BEGIN
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatLoad_MPIAIJ"
1916 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1917 {
1918   Mat          A;
1919   PetscScalar  *vals,*svals;
1920   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1921   MPI_Status   status;
1922   int          i,nz,ierr,j,rstart,rend,fd;
1923   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1924   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1925   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1926 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
1927   PetscTruth   flag;
1928 #endif
1929 
1930   PetscFunctionBegin;
1931   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1932   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1933   if (!rank) {
1934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1935     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1936     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1937     if (header[3] < 0) {
1938       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1939     }
1940   }
1941 
1942   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1943   M = header[1]; N = header[2];
1944   /* determine ownership of all rows */
1945   m = M/size + ((M % size) > rank);
1946   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1947   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1948   rowners[0] = 0;
1949   for (i=2; i<=size; i++) {
1950     rowners[i] += rowners[i-1];
1951   }
1952   rstart = rowners[rank];
1953   rend   = rowners[rank+1];
1954 
1955   /* distribute row lengths to all processors */
1956   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1957   offlens = ourlens + (rend-rstart);
1958   if (!rank) {
1959     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1960     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1961     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1962     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1963     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1964     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1965   } else {
1966     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1967   }
1968 
1969   if (!rank) {
1970     /* calculate the number of nonzeros on each processor */
1971     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1972     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1973     for (i=0; i<size; i++) {
1974       for (j=rowners[i]; j< rowners[i+1]; j++) {
1975         procsnz[i] += rowlengths[j];
1976       }
1977     }
1978     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1979 
1980     /* determine max buffer needed and allocate it */
1981     maxnz = 0;
1982     for (i=0; i<size; i++) {
1983       maxnz = PetscMax(maxnz,procsnz[i]);
1984     }
1985     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1986 
1987     /* read in my part of the matrix column indices  */
1988     nz   = procsnz[0];
1989     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1990     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1991 
1992     /* read in every one elses and ship off */
1993     for (i=1; i<size; i++) {
1994       nz   = procsnz[i];
1995       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1996       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1997     }
1998     ierr = PetscFree(cols);CHKERRQ(ierr);
1999   } else {
2000     /* determine buffer space needed for message */
2001     nz = 0;
2002     for (i=0; i<m; i++) {
2003       nz += ourlens[i];
2004     }
2005     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2006 
2007     /* receive message of column indices*/
2008     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2009     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2010     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2011   }
2012 
2013   /* determine column ownership if matrix is not square */
2014   if (N != M) {
2015     n      = N/size + ((N % size) > rank);
2016     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2017     cstart = cend - n;
2018   } else {
2019     cstart = rstart;
2020     cend   = rend;
2021     n      = cend - cstart;
2022   }
2023 
2024   /* loop over local rows, determining number of off diagonal entries */
2025   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2026   jj = 0;
2027   for (i=0; i<m; i++) {
2028     for (j=0; j<ourlens[i]; j++) {
2029       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2030       jj++;
2031     }
2032   }
2033 
2034   /* create our matrix */
2035   for (i=0; i<m; i++) {
2036     ourlens[i] -= offlens[i];
2037   }
2038   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2039   A = *newmat;
2040   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2041   for (i=0; i<m; i++) {
2042     ourlens[i] += offlens[i];
2043   }
2044 
2045   if (!rank) {
2046     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2047 
2048     /* read in my part of the matrix numerical values  */
2049     nz   = procsnz[0];
2050     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2051 
2052     /* insert into matrix */
2053     jj      = rstart;
2054     smycols = mycols;
2055     svals   = vals;
2056     for (i=0; i<m; i++) {
2057       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2058       smycols += ourlens[i];
2059       svals   += ourlens[i];
2060       jj++;
2061     }
2062 
2063     /* read in other processors and ship out */
2064     for (i=1; i<size; i++) {
2065       nz   = procsnz[i];
2066       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2067       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2068     }
2069     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2070   } else {
2071     /* receive numeric values */
2072     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2073 
2074     /* receive message of values*/
2075     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2076     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2077     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2078 
2079     /* insert into matrix */
2080     jj      = rstart;
2081     smycols = mycols;
2082     svals   = vals;
2083     for (i=0; i<m; i++) {
2084       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2085       smycols += ourlens[i];
2086       svals   += ourlens[i];
2087       jj++;
2088     }
2089   }
2090   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2091   ierr = PetscFree(vals);CHKERRQ(ierr);
2092   ierr = PetscFree(mycols);CHKERRQ(ierr);
2093   ierr = PetscFree(rowners);CHKERRQ(ierr);
2094 
2095   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2096   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097 #if defined(PETSC_HAVE_SPOOLES)
2098   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2099   if (flag) {
2100     if (size == 1) {
2101       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2102     } else {
2103       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2104     }
2105   }
2106 #endif
2107 #if defined(PETSC_HAVE_SUPERLUDIST)
2108   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2109   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2110 #endif
2111 #if defined(PETSC_HAVE_MUMPS)
2112   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2113   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2114 #endif
2115   PetscFunctionReturn(0);
2116 }
2117 EXTERN_C_END
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2121 /*
2122     Not great since it makes two copies of the submatrix, first an SeqAIJ
2123   in local and then by concatenating the local matrices the end result.
2124   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2125 */
2126 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2127 {
2128   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2129   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2130   Mat          *local,M,Mreuse;
2131   PetscScalar  *vwork,*aa;
2132   MPI_Comm     comm = mat->comm;
2133   Mat_SeqAIJ   *aij;
2134 
2135 
2136   PetscFunctionBegin;
2137   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2139 
2140   if (call ==  MAT_REUSE_MATRIX) {
2141     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2142     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2143     local = &Mreuse;
2144     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2145   } else {
2146     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2147     Mreuse = *local;
2148     ierr   = PetscFree(local);CHKERRQ(ierr);
2149   }
2150 
2151   /*
2152       m - number of local rows
2153       n - number of columns (same on all processors)
2154       rstart - first row in new global matrix generated
2155   */
2156   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2157   if (call == MAT_INITIAL_MATRIX) {
2158     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2159     ii  = aij->i;
2160     jj  = aij->j;
2161 
2162     /*
2163         Determine the number of non-zeros in the diagonal and off-diagonal
2164         portions of the matrix in order to do correct preallocation
2165     */
2166 
2167     /* first get start and end of "diagonal" columns */
2168     if (csize == PETSC_DECIDE) {
2169       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2170       if (mglobal == n) { /* square matrix */
2171 	nlocal = m;
2172       } else {
2173         nlocal = n/size + ((n % size) > rank);
2174       }
2175     } else {
2176       nlocal = csize;
2177     }
2178     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2179     rstart = rend - nlocal;
2180     if (rank == size - 1 && rend != n) {
2181       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2182     }
2183 
2184     /* next, compute all the lengths */
2185     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2186     olens = dlens + m;
2187     for (i=0; i<m; i++) {
2188       jend = ii[i+1] - ii[i];
2189       olen = 0;
2190       dlen = 0;
2191       for (j=0; j<jend; j++) {
2192         if (*jj < rstart || *jj >= rend) olen++;
2193         else dlen++;
2194         jj++;
2195       }
2196       olens[i] = olen;
2197       dlens[i] = dlen;
2198     }
2199     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2200     ierr = PetscFree(dlens);CHKERRQ(ierr);
2201   } else {
2202     int ml,nl;
2203 
2204     M = *newmat;
2205     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2206     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2207     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2208     /*
2209          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2210        rather than the slower MatSetValues().
2211     */
2212     M->was_assembled = PETSC_TRUE;
2213     M->assembled     = PETSC_FALSE;
2214   }
2215   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2216   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2217   ii  = aij->i;
2218   jj  = aij->j;
2219   aa  = aij->a;
2220   for (i=0; i<m; i++) {
2221     row   = rstart + i;
2222     nz    = ii[i+1] - ii[i];
2223     cwork = jj;     jj += nz;
2224     vwork = aa;     aa += nz;
2225     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2226   }
2227 
2228   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2229   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2230   *newmat = M;
2231 
2232   /* save submatrix used in processor for next request */
2233   if (call ==  MAT_INITIAL_MATRIX) {
2234     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2235     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2236   }
2237 
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2243 /*@C
2244    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2245    (the default parallel PETSc format).  For good matrix assembly performance
2246    the user should preallocate the matrix storage by setting the parameters
2247    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2248    performance can be increased by more than a factor of 50.
2249 
2250    Collective on MPI_Comm
2251 
2252    Input Parameters:
2253 +  A - the matrix
2254 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2255            (same value is used for all local rows)
2256 .  d_nnz - array containing the number of nonzeros in the various rows of the
2257            DIAGONAL portion of the local submatrix (possibly different for each row)
2258            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2259            The size of this array is equal to the number of local rows, i.e 'm'.
2260            You must leave room for the diagonal entry even if it is zero.
2261 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2262            submatrix (same value is used for all local rows).
2263 -  o_nnz - array containing the number of nonzeros in the various rows of the
2264            OFF-DIAGONAL portion of the local submatrix (possibly different for
2265            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2266            structure. The size of this array is equal to the number
2267            of local rows, i.e 'm'.
2268 
2269    The AIJ format (also called the Yale sparse matrix format or
2270    compressed row storage), is fully compatible with standard Fortran 77
2271    storage.  That is, the stored row and column indices can begin at
2272    either one (as in Fortran) or zero.  See the users manual for details.
2273 
2274    The user MUST specify either the local or global matrix dimensions
2275    (possibly both).
2276 
2277    The parallel matrix is partitioned such that the first m0 rows belong to
2278    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2279    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2280 
2281    The DIAGONAL portion of the local submatrix of a processor can be defined
2282    as the submatrix which is obtained by extraction the part corresponding
2283    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2284    first row that belongs to the processor, and r2 is the last row belonging
2285    to the this processor. This is a square mxm matrix. The remaining portion
2286    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2287 
2288    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2289 
2290    By default, this format uses inodes (identical nodes) when possible.
2291    We search for consecutive rows with the same nonzero structure, thereby
2292    reusing matrix information to achieve increased efficiency.
2293 
2294    Options Database Keys:
2295 +  -mat_aij_no_inode  - Do not use inodes
2296 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2297 -  -mat_aij_oneindex - Internally use indexing starting at 1
2298         rather than 0.  Note that when calling MatSetValues(),
2299         the user still MUST index entries starting at 0!
2300 
2301    Example usage:
2302 
2303    Consider the following 8x8 matrix with 34 non-zero values, that is
2304    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2305    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2306    as follows:
2307 
2308 .vb
2309             1  2  0  |  0  3  0  |  0  4
2310     Proc0   0  5  6  |  7  0  0  |  8  0
2311             9  0 10  | 11  0  0  | 12  0
2312     -------------------------------------
2313            13  0 14  | 15 16 17  |  0  0
2314     Proc1   0 18  0  | 19 20 21  |  0  0
2315             0  0  0  | 22 23  0  | 24  0
2316     -------------------------------------
2317     Proc2  25 26 27  |  0  0 28  | 29  0
2318            30  0  0  | 31 32 33  |  0 34
2319 .ve
2320 
2321    This can be represented as a collection of submatrices as:
2322 
2323 .vb
2324       A B C
2325       D E F
2326       G H I
2327 .ve
2328 
2329    Where the submatrices A,B,C are owned by proc0, D,E,F are
2330    owned by proc1, G,H,I are owned by proc2.
2331 
2332    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2333    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2334    The 'M','N' parameters are 8,8, and have the same values on all procs.
2335 
2336    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2337    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2338    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2339    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2340    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2341    matrix, ans [DF] as another SeqAIJ matrix.
2342 
2343    When d_nz, o_nz parameters are specified, d_nz storage elements are
2344    allocated for every row of the local diagonal submatrix, and o_nz
2345    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2346    One way to choose d_nz and o_nz is to use the max nonzerors per local
2347    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2348    In this case, the values of d_nz,o_nz are:
2349 .vb
2350      proc0 : dnz = 2, o_nz = 2
2351      proc1 : dnz = 3, o_nz = 2
2352      proc2 : dnz = 1, o_nz = 4
2353 .ve
2354    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2355    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2356    for proc3. i.e we are using 12+15+10=37 storage locations to store
2357    34 values.
2358 
2359    When d_nnz, o_nnz parameters are specified, the storage is specified
2360    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2361    In the above case the values for d_nnz,o_nnz are:
2362 .vb
2363      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2364      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2365      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2366 .ve
2367    Here the space allocated is sum of all the above values i.e 34, and
2368    hence pre-allocation is perfect.
2369 
2370    Level: intermediate
2371 
2372 .keywords: matrix, aij, compressed row, sparse, parallel
2373 
2374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2375 @*/
2376 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2377 {
2378   int ierr,(*f)(Mat,int,int*,int,int*);
2379 
2380   PetscFunctionBegin;
2381   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2382   if (f) {
2383     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2384   }
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatCreateMPIAIJ"
2390 /*@C
2391    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2392    (the default parallel PETSc format).  For good matrix assembly performance
2393    the user should preallocate the matrix storage by setting the parameters
2394    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2395    performance can be increased by more than a factor of 50.
2396 
2397    Collective on MPI_Comm
2398 
2399    Input Parameters:
2400 +  comm - MPI communicator
2401 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2402            This value should be the same as the local size used in creating the
2403            y vector for the matrix-vector product y = Ax.
2404 .  n - This value should be the same as the local size used in creating the
2405        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2406        calculated if N is given) For square matrices n is almost always m.
2407 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2408 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2409 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2410            (same value is used for all local rows)
2411 .  d_nnz - array containing the number of nonzeros in the various rows of the
2412            DIAGONAL portion of the local submatrix (possibly different for each row)
2413            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2414            The size of this array is equal to the number of local rows, i.e 'm'.
2415            You must leave room for the diagonal entry even if it is zero.
2416 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2417            submatrix (same value is used for all local rows).
2418 -  o_nnz - array containing the number of nonzeros in the various rows of the
2419            OFF-DIAGONAL portion of the local submatrix (possibly different for
2420            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2421            structure. The size of this array is equal to the number
2422            of local rows, i.e 'm'.
2423 
2424    Output Parameter:
2425 .  A - the matrix
2426 
2427    Notes:
2428    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2429    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2430    storage requirements for this matrix.
2431 
2432    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2433    processor than it must be used on all processors that share the object for
2434    that argument.
2435 
2436    The AIJ format (also called the Yale sparse matrix format or
2437    compressed row storage), is fully compatible with standard Fortran 77
2438    storage.  That is, the stored row and column indices can begin at
2439    either one (as in Fortran) or zero.  See the users manual for details.
2440 
2441    The user MUST specify either the local or global matrix dimensions
2442    (possibly both).
2443 
2444    The parallel matrix is partitioned such that the first m0 rows belong to
2445    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2446    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2447 
2448    The DIAGONAL portion of the local submatrix of a processor can be defined
2449    as the submatrix which is obtained by extraction the part corresponding
2450    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2451    first row that belongs to the processor, and r2 is the last row belonging
2452    to the this processor. This is a square mxm matrix. The remaining portion
2453    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2454 
2455    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2456 
2457    By default, this format uses inodes (identical nodes) when possible.
2458    We search for consecutive rows with the same nonzero structure, thereby
2459    reusing matrix information to achieve increased efficiency.
2460 
2461    Options Database Keys:
2462 +  -mat_aij_no_inode  - Do not use inodes
2463 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2464 -  -mat_aij_oneindex - Internally use indexing starting at 1
2465         rather than 0.  Note that when calling MatSetValues(),
2466         the user still MUST index entries starting at 0!
2467 
2468 
2469    Example usage:
2470 
2471    Consider the following 8x8 matrix with 34 non-zero values, that is
2472    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2473    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2474    as follows:
2475 
2476 .vb
2477             1  2  0  |  0  3  0  |  0  4
2478     Proc0   0  5  6  |  7  0  0  |  8  0
2479             9  0 10  | 11  0  0  | 12  0
2480     -------------------------------------
2481            13  0 14  | 15 16 17  |  0  0
2482     Proc1   0 18  0  | 19 20 21  |  0  0
2483             0  0  0  | 22 23  0  | 24  0
2484     -------------------------------------
2485     Proc2  25 26 27  |  0  0 28  | 29  0
2486            30  0  0  | 31 32 33  |  0 34
2487 .ve
2488 
2489    This can be represented as a collection of submatrices as:
2490 
2491 .vb
2492       A B C
2493       D E F
2494       G H I
2495 .ve
2496 
2497    Where the submatrices A,B,C are owned by proc0, D,E,F are
2498    owned by proc1, G,H,I are owned by proc2.
2499 
2500    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2501    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2502    The 'M','N' parameters are 8,8, and have the same values on all procs.
2503 
2504    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2505    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2506    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2507    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2508    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2509    matrix, ans [DF] as another SeqAIJ matrix.
2510 
2511    When d_nz, o_nz parameters are specified, d_nz storage elements are
2512    allocated for every row of the local diagonal submatrix, and o_nz
2513    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2514    One way to choose d_nz and o_nz is to use the max nonzerors per local
2515    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2516    In this case, the values of d_nz,o_nz are:
2517 .vb
2518      proc0 : dnz = 2, o_nz = 2
2519      proc1 : dnz = 3, o_nz = 2
2520      proc2 : dnz = 1, o_nz = 4
2521 .ve
2522    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2523    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2524    for proc3. i.e we are using 12+15+10=37 storage locations to store
2525    34 values.
2526 
2527    When d_nnz, o_nnz parameters are specified, the storage is specified
2528    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2529    In the above case the values for d_nnz,o_nnz are:
2530 .vb
2531      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2532      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2533      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2534 .ve
2535    Here the space allocated is sum of all the above values i.e 34, and
2536    hence pre-allocation is perfect.
2537 
2538    Level: intermediate
2539 
2540 .keywords: matrix, aij, compressed row, sparse, parallel
2541 
2542 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2543 @*/
2544 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2545 {
2546   int ierr,size;
2547 
2548   PetscFunctionBegin;
2549   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2550   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2551   if (size > 1) {
2552     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2553     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2554   } else {
2555     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2556     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2563 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2564 {
2565   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2566   PetscFunctionBegin;
2567   *Ad     = a->A;
2568   *Ao     = a->B;
2569   *colmap = a->garray;
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #undef __FUNCT__
2574 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2575 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2576 {
2577   int        ierr,i;
2578   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2579 
2580   PetscFunctionBegin;
2581   if (coloring->ctype == IS_COLORING_LOCAL) {
2582     ISColoringValue *allcolors,*colors;
2583     ISColoring      ocoloring;
2584 
2585     /* set coloring for diagonal portion */
2586     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2587 
2588     /* set coloring for off-diagonal portion */
2589     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2590     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2591     for (i=0; i<a->B->n; i++) {
2592       colors[i] = allcolors[a->garray[i]];
2593     }
2594     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2595     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2596     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2597     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2598   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2599     ISColoringValue *colors;
2600     int             *larray;
2601     ISColoring      ocoloring;
2602 
2603     /* set coloring for diagonal portion */
2604     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2605     for (i=0; i<a->A->n; i++) {
2606       larray[i] = i + a->cstart;
2607     }
2608     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2609     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2610     for (i=0; i<a->A->n; i++) {
2611       colors[i] = coloring->colors[larray[i]];
2612     }
2613     ierr = PetscFree(larray);CHKERRQ(ierr);
2614     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2615     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2616     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2617 
2618     /* set coloring for off-diagonal portion */
2619     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2620     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2621     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2622     for (i=0; i<a->B->n; i++) {
2623       colors[i] = coloring->colors[larray[i]];
2624     }
2625     ierr = PetscFree(larray);CHKERRQ(ierr);
2626     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2627     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2628     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2629   } else {
2630     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2631   }
2632 
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 #undef __FUNCT__
2637 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2638 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2639 {
2640   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2641   int        ierr;
2642 
2643   PetscFunctionBegin;
2644   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2645   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2651 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2652 {
2653   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2654   int        ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2658   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatMerge"
2664 /*@C
2665       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2666                  matrices from each processor
2667 
2668     Collective on MPI_Comm
2669 
2670    Input Parameters:
2671 +    comm - the communicators the parallel matrix will live on
2672 -    inmat - the input sequential matrices
2673 
2674    Output Parameter:
2675 .    outmat - the parallel matrix generated
2676 
2677     Level: advanced
2678 
2679    Notes: The number of columns of the matrix in EACH of the seperate files
2680       MUST be the same.
2681 
2682 @*/
2683 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2684 {
2685   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2686   PetscScalar *values;
2687   PetscMap    columnmap,rowmap;
2688 
2689   PetscFunctionBegin;
2690 
2691   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2692 
2693   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2694   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2695   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2696   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2697   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2698   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2699 
2700   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2701   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2702   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2703   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2704   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2705 
2706   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2707   for (i=0;i<m;i++) {
2708     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2709     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2710     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2711   }
2712   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2713   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2714 
2715   for (i=0;i<m;i++) {
2716     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2717     I    = i + rstart;
2718     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2719     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2720   }
2721   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2722   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2723   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724 
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 #undef __FUNCT__
2729 #define __FUNCT__ "MatFileSplit"
2730 int MatFileSplit(Mat A,char *outfile)
2731 {
2732   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2733   PetscViewer out;
2734   char        *name;
2735   Mat         B;
2736   PetscScalar *values;
2737 
2738   PetscFunctionBegin;
2739 
2740   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2741   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2742   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2743   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2744   for (i=0;i<m;i++) {
2745     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2746     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2747     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2748   }
2749   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2751 
2752   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2753   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2754   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2755   sprintf(name,"%s.%d",outfile,rank);
2756   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2757   ierr = PetscFree(name);
2758   ierr = MatView(B,out);CHKERRQ(ierr);
2759   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2760   ierr = MatDestroy(B);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763