xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4e7234bf172ce7019c4948834d73bd3002d026ba)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
289     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
294         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421 
422 #if defined(PETSC_HAVE_MUMPS)
423   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
424   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
431 int MatZeroEntries_MPIAIJ(Mat A)
432 {
433   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
434   int        ierr;
435 
436   PetscFunctionBegin;
437   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
438   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatZeroRows_MPIAIJ"
444 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
445 {
446   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
447   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
448   int            *nprocs,j,idx,nsends,row;
449   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
450   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
451   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
452   MPI_Comm       comm = A->comm;
453   MPI_Request    *send_waits,*recv_waits;
454   MPI_Status     recv_status,*send_status;
455   IS             istmp;
456   PetscTruth     found;
457 
458   PetscFunctionBegin;
459   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
460   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
461 
462   /*  first count number of contributors to each processor */
463   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
464   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
465   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
466   for (i=0; i<N; i++) {
467     idx = rows[i];
468     found = PETSC_FALSE;
469     for (j=0; j<size; j++) {
470       if (idx >= owners[j] && idx < owners[j+1]) {
471         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
472       }
473     }
474     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   PetscLogObjectParent(A,istmp);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   int        ierr,nt;
595 
596   PetscFunctionBegin;
597   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
598   if (nt != A->n) {
599     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
600   }
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_MPIAIJ"
610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
613   int        ierr;
614 
615   PetscFunctionBegin;
616   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
617   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
618   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
619   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
648 {
649   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
651   IS         Me,Notme;
652   int        M,N,first,last,*notme,i, ierr;
653 
654   PetscFunctionBegin;
655 
656   /* Easy test: symmetric diagonal block */
657   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
658   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
659   if (!*f) PetscFunctionReturn(0);
660 
661   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
662   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
663   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
664   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
665   for (i=0; i<first; i++) notme[i] = i;
666   for (i=last; i<M; i++) notme[i-last+first] = i;
667   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
668   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
669   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
670   Aoff = Aoffs[0];
671   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
672   Boff = Boffs[0];
673   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
674   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
675   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
676   ierr = ISDestroy(Me); CHKERRQ(ierr);
677   ierr = ISDestroy(Notme); CHKERRQ(ierr);
678 
679   PetscFunctionReturn(0);
680 }
681 EXTERN_C_END
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
685 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
686 {
687   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
688   int        ierr;
689 
690   PetscFunctionBegin;
691   /* do nondiagonal part */
692   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
693   /* send it on its way */
694   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
695   /* do local part */
696   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
697   /* receive remote parts: note this assumes the values are not actually */
698   /* inserted in yy until the next line, which is true for my implementation*/
699   /* but is not perhaps always true. */
700   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 /*
705   This only works correctly for square matrices where the subblock A->A is the
706    diagonal block
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
710 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
711 {
712   int        ierr;
713   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
714 
715   PetscFunctionBegin;
716   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
717   if (a->rstart != a->cstart || a->rend != a->cend) {
718     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
719   }
720   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatScale_MPIAIJ"
726 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
727 {
728   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
729   int        ierr;
730 
731   PetscFunctionBegin;
732   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
733   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "MatDestroy_MPIAIJ"
739 int MatDestroy_MPIAIJ(Mat mat)
740 {
741   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
742   int        ierr;
743 
744   PetscFunctionBegin;
745 #if defined(PETSC_USE_LOG)
746   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
747 #endif
748   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
749   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
750   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
751   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
752 #if defined (PETSC_USE_CTABLE)
753   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
754 #else
755   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
756 #endif
757   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
758   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
759   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
760   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
761   ierr = PetscFree(aij);CHKERRQ(ierr);
762   PetscFunctionReturn(0);
763 }
764 
765 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
766 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
767 extern int MatFactorInfo_MUMPS(Mat,PetscViewer);
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatView_MPIAIJ_Binary"
771 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
772 {
773   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
774   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
775   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
776   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
777   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
778   PetscScalar       *column_values;
779 
780   PetscFunctionBegin;
781   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
782   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
783   nz   = A->nz + B->nz;
784   if (rank == 0) {
785     header[0] = MAT_FILE_COOKIE;
786     header[1] = mat->M;
787     header[2] = mat->N;
788     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
789     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
790     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
791     /* get largest number of rows any processor has */
792     rlen = mat->m;
793     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       rlen = PetscMax(rlen,range[i+1] - range[i]);
796     }
797   } else {
798     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
799     rlen = mat->m;
800   }
801 
802   /* load up the local row counts */
803   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
804   for (i=0; i<mat->m; i++) {
805     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
806   }
807 
808   /* store the row lengths to the file */
809   if (rank == 0) {
810     MPI_Status status;
811     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
812     for (i=1; i<size; i++) {
813       rlen = range[i+1] - range[i];
814       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
815       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
816     }
817   } else {
818     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
819   }
820   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
821 
822   /* load up the local column indices */
823   nzmax = nz; /* )th processor needs space a largest processor needs */
824   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
825   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
826   cnt  = 0;
827   for (i=0; i<mat->m; i++) {
828     for (j=B->i[i]; j<B->i[i+1]; j++) {
829       if ( (col = garray[B->j[j]]) > cstart) break;
830       column_indices[cnt++] = col;
831     }
832     for (k=A->i[i]; k<A->i[i+1]; k++) {
833       column_indices[cnt++] = A->j[k] + cstart;
834     }
835     for (; j<B->i[i+1]; j++) {
836       column_indices[cnt++] = garray[B->j[j]];
837     }
838   }
839   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
840 
841   /* store the column indices to the file */
842   if (rank == 0) {
843     MPI_Status status;
844     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
845     for (i=1; i<size; i++) {
846       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
847       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
848       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
849       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
850     }
851   } else {
852     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
853     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
854   }
855   ierr = PetscFree(column_indices);CHKERRQ(ierr);
856 
857   /* load up the local column values */
858   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
859   cnt  = 0;
860   for (i=0; i<mat->m; i++) {
861     for (j=B->i[i]; j<B->i[i+1]; j++) {
862       if ( garray[B->j[j]] > cstart) break;
863       column_values[cnt++] = B->a[j];
864     }
865     for (k=A->i[i]; k<A->i[i+1]; k++) {
866       column_values[cnt++] = A->a[k];
867     }
868     for (; j<B->i[i+1]; j++) {
869       column_values[cnt++] = B->a[j];
870     }
871   }
872   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
873 
874   /* store the column values to the file */
875   if (rank == 0) {
876     MPI_Status status;
877     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
878     for (i=1; i<size; i++) {
879       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
880       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
881       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
882       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
883     }
884   } else {
885     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
886     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
887   }
888   ierr = PetscFree(column_values);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
894 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
895 {
896   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
897   int               ierr,rank = aij->rank,size = aij->size;
898   PetscTruth        isdraw,isascii,flg,isbinary;
899   PetscViewer       sviewer;
900   PetscViewerFormat format;
901 
902   PetscFunctionBegin;
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
906   if (isascii) {
907     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
908     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
909       MatInfo info;
910       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
911       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
912       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
913       if (flg) {
914         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
915 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
916       } else {
917         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
918 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
919       }
920       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
921       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
922       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
923       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
924       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
925       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
926       PetscFunctionReturn(0);
927     } else if (format == PETSC_VIEWER_ASCII_INFO) {
928       PetscFunctionReturn(0);
929     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
930 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
931       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
932 #endif
933 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
934       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
935 #endif
936 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE)
937       ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr);
938 #endif
939       PetscFunctionReturn(0);
940     }
941   } else if (isbinary) {
942     if (size == 1) {
943       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
944       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
945     } else {
946       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
947     }
948     PetscFunctionReturn(0);
949   } else if (isdraw) {
950     PetscDraw  draw;
951     PetscTruth isnull;
952     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
953     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
954   }
955 
956   if (size == 1) {
957     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
958     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
959   } else {
960     /* assemble the entire matrix onto first processor. */
961     Mat         A;
962     Mat_SeqAIJ *Aloc;
963     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
964     PetscScalar *a;
965 
966     if (!rank) {
967       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
968     } else {
969       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
970     }
971     PetscLogObjectParent(mat,A);
972 
973     /* copy over the A part */
974     Aloc = (Mat_SeqAIJ*)aij->A->data;
975     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
976     row = aij->rstart;
977     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
978     for (i=0; i<m; i++) {
979       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
980       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
981     }
982     aj = Aloc->j;
983     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
984 
985     /* copy over the B part */
986     Aloc = (Mat_SeqAIJ*)aij->B->data;
987     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
988     row  = aij->rstart;
989     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
990     ct   = cols;
991     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
992     for (i=0; i<m; i++) {
993       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
994       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
995     }
996     ierr = PetscFree(ct);CHKERRQ(ierr);
997     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
998     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
999     /*
1000        Everyone has to call to draw the matrix since the graphics waits are
1001        synchronized across all processors that share the PetscDraw object
1002     */
1003     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1004     if (!rank) {
1005       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1006       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1007     }
1008     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1009     ierr = MatDestroy(A);CHKERRQ(ierr);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatView_MPIAIJ"
1016 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1017 {
1018   int        ierr;
1019   PetscTruth isascii,isdraw,issocket,isbinary;
1020 
1021   PetscFunctionBegin;
1022   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1023   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1024   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1025   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1026   if (isascii || isdraw || isbinary || issocket) {
1027     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1028   } else {
1029     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1030   }
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatRelax_MPIAIJ"
1038 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1039 {
1040   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1041   int          ierr;
1042   Vec          bb1;
1043   PetscScalar  mone=-1.0;
1044 
1045   PetscFunctionBegin;
1046   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1047 
1048   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1049 
1050   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1051     if (flag & SOR_ZERO_INITIAL_GUESS) {
1052       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1053       its--;
1054     }
1055 
1056     while (its--) {
1057       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1058       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059 
1060       /* update rhs: bb1 = bb - B*x */
1061       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1062       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1063 
1064       /* local sweep */
1065       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1066       CHKERRQ(ierr);
1067     }
1068   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1069     if (flag & SOR_ZERO_INITIAL_GUESS) {
1070       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1071       its--;
1072     }
1073     while (its--) {
1074       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1075       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1076 
1077       /* update rhs: bb1 = bb - B*x */
1078       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1079       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1080 
1081       /* local sweep */
1082       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1083       CHKERRQ(ierr);
1084     }
1085   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1086     if (flag & SOR_ZERO_INITIAL_GUESS) {
1087       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1088       its--;
1089     }
1090     while (its--) {
1091       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1092       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1093 
1094       /* update rhs: bb1 = bb - B*x */
1095       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1096       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1097 
1098       /* local sweep */
1099       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1100       CHKERRQ(ierr);
1101     }
1102   } else {
1103     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1104   }
1105 
1106   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1112 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1113 {
1114   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1115   Mat        A = mat->A,B = mat->B;
1116   int        ierr;
1117   PetscReal  isend[5],irecv[5];
1118 
1119   PetscFunctionBegin;
1120   info->block_size     = 1.0;
1121   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1122   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1123   isend[3] = info->memory;  isend[4] = info->mallocs;
1124   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1126   isend[3] += info->memory;  isend[4] += info->mallocs;
1127   if (flag == MAT_LOCAL) {
1128     info->nz_used      = isend[0];
1129     info->nz_allocated = isend[1];
1130     info->nz_unneeded  = isend[2];
1131     info->memory       = isend[3];
1132     info->mallocs      = isend[4];
1133   } else if (flag == MAT_GLOBAL_MAX) {
1134     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1135     info->nz_used      = irecv[0];
1136     info->nz_allocated = irecv[1];
1137     info->nz_unneeded  = irecv[2];
1138     info->memory       = irecv[3];
1139     info->mallocs      = irecv[4];
1140   } else if (flag == MAT_GLOBAL_SUM) {
1141     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1142     info->nz_used      = irecv[0];
1143     info->nz_allocated = irecv[1];
1144     info->nz_unneeded  = irecv[2];
1145     info->memory       = irecv[3];
1146     info->mallocs      = irecv[4];
1147   }
1148   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1149   info->fill_ratio_needed = 0;
1150   info->factor_mallocs    = 0;
1151   info->rows_global       = (double)matin->M;
1152   info->columns_global    = (double)matin->N;
1153   info->rows_local        = (double)matin->m;
1154   info->columns_local     = (double)matin->N;
1155 
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatSetOption_MPIAIJ"
1161 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1162 {
1163   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1164   int        ierr;
1165 
1166   PetscFunctionBegin;
1167   switch (op) {
1168   case MAT_NO_NEW_NONZERO_LOCATIONS:
1169   case MAT_YES_NEW_NONZERO_LOCATIONS:
1170   case MAT_COLUMNS_UNSORTED:
1171   case MAT_COLUMNS_SORTED:
1172   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1173   case MAT_KEEP_ZEROED_ROWS:
1174   case MAT_NEW_NONZERO_LOCATION_ERR:
1175   case MAT_USE_INODES:
1176   case MAT_DO_NOT_USE_INODES:
1177   case MAT_IGNORE_ZERO_ENTRIES:
1178     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1179     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1180     break;
1181   case MAT_ROW_ORIENTED:
1182     a->roworiented = PETSC_TRUE;
1183     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1184     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1185     break;
1186   case MAT_ROWS_SORTED:
1187   case MAT_ROWS_UNSORTED:
1188   case MAT_YES_NEW_DIAGONALS:
1189     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1190     break;
1191   case MAT_COLUMN_ORIENTED:
1192     a->roworiented = PETSC_FALSE;
1193     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1194     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1195     break;
1196   case MAT_IGNORE_OFF_PROC_ENTRIES:
1197     a->donotstash = PETSC_TRUE;
1198     break;
1199   case MAT_NO_NEW_DIAGONALS:
1200     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1201   default:
1202     SETERRQ(PETSC_ERR_SUP,"unknown option");
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatGetRow_MPIAIJ"
1209 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1210 {
1211   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1212   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1213   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1214   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1215   int          *cmap,*idx_p;
1216 
1217   PetscFunctionBegin;
1218   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1219   mat->getrowactive = PETSC_TRUE;
1220 
1221   if (!mat->rowvalues && (idx || v)) {
1222     /*
1223         allocate enough space to hold information from the longest row.
1224     */
1225     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1226     int     max = 1,tmp;
1227     for (i=0; i<matin->m; i++) {
1228       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1229       if (max < tmp) { max = tmp; }
1230     }
1231     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1232     mat->rowindices = (int*)(mat->rowvalues + max);
1233   }
1234 
1235   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1236   lrow = row - rstart;
1237 
1238   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1239   if (!v)   {pvA = 0; pvB = 0;}
1240   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1241   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1242   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1243   nztot = nzA + nzB;
1244 
1245   cmap  = mat->garray;
1246   if (v  || idx) {
1247     if (nztot) {
1248       /* Sort by increasing column numbers, assuming A and B already sorted */
1249       int imark = -1;
1250       if (v) {
1251         *v = v_p = mat->rowvalues;
1252         for (i=0; i<nzB; i++) {
1253           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1254           else break;
1255         }
1256         imark = i;
1257         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1258         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1259       }
1260       if (idx) {
1261         *idx = idx_p = mat->rowindices;
1262         if (imark > -1) {
1263           for (i=0; i<imark; i++) {
1264             idx_p[i] = cmap[cworkB[i]];
1265           }
1266         } else {
1267           for (i=0; i<nzB; i++) {
1268             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1269             else break;
1270           }
1271           imark = i;
1272         }
1273         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1274         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1275       }
1276     } else {
1277       if (idx) *idx = 0;
1278       if (v)   *v   = 0;
1279     }
1280   }
1281   *nz = nztot;
1282   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1283   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1289 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1290 {
1291   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1292 
1293   PetscFunctionBegin;
1294   if (aij->getrowactive == PETSC_FALSE) {
1295     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1296   }
1297   aij->getrowactive = PETSC_FALSE;
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatNorm_MPIAIJ"
1303 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1304 {
1305   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1306   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1307   int          ierr,i,j,cstart = aij->cstart;
1308   PetscReal    sum = 0.0;
1309   PetscScalar  *v;
1310 
1311   PetscFunctionBegin;
1312   if (aij->size == 1) {
1313     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1314   } else {
1315     if (type == NORM_FROBENIUS) {
1316       v = amat->a;
1317       for (i=0; i<amat->nz; i++) {
1318 #if defined(PETSC_USE_COMPLEX)
1319         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1320 #else
1321         sum += (*v)*(*v); v++;
1322 #endif
1323       }
1324       v = bmat->a;
1325       for (i=0; i<bmat->nz; i++) {
1326 #if defined(PETSC_USE_COMPLEX)
1327         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1328 #else
1329         sum += (*v)*(*v); v++;
1330 #endif
1331       }
1332       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1333       *norm = sqrt(*norm);
1334     } else if (type == NORM_1) { /* max column norm */
1335       PetscReal *tmp,*tmp2;
1336       int    *jj,*garray = aij->garray;
1337       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1338       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1339       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1340       *norm = 0.0;
1341       v = amat->a; jj = amat->j;
1342       for (j=0; j<amat->nz; j++) {
1343         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1344       }
1345       v = bmat->a; jj = bmat->j;
1346       for (j=0; j<bmat->nz; j++) {
1347         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1348       }
1349       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1350       for (j=0; j<mat->N; j++) {
1351         if (tmp2[j] > *norm) *norm = tmp2[j];
1352       }
1353       ierr = PetscFree(tmp);CHKERRQ(ierr);
1354       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1355     } else if (type == NORM_INFINITY) { /* max row norm */
1356       PetscReal ntemp = 0.0;
1357       for (j=0; j<aij->A->m; j++) {
1358         v = amat->a + amat->i[j];
1359         sum = 0.0;
1360         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1361           sum += PetscAbsScalar(*v); v++;
1362         }
1363         v = bmat->a + bmat->i[j];
1364         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1365           sum += PetscAbsScalar(*v); v++;
1366         }
1367         if (sum > ntemp) ntemp = sum;
1368       }
1369       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1370     } else {
1371       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1372     }
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatTranspose_MPIAIJ"
1379 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1380 {
1381   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1382   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1383   int          ierr;
1384   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1385   Mat          B;
1386   PetscScalar  *array;
1387 
1388   PetscFunctionBegin;
1389   if (!matout && M != N) {
1390     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1391   }
1392 
1393   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1394 
1395   /* copy over the A part */
1396   Aloc = (Mat_SeqAIJ*)a->A->data;
1397   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1398   row = a->rstart;
1399   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1400   for (i=0; i<m; i++) {
1401     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1402     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1403   }
1404   aj = Aloc->j;
1405   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1406 
1407   /* copy over the B part */
1408   Aloc = (Mat_SeqAIJ*)a->B->data;
1409   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1410   row  = a->rstart;
1411   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1412   ct   = cols;
1413   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1414   for (i=0; i<m; i++) {
1415     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1416     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1417   }
1418   ierr = PetscFree(ct);CHKERRQ(ierr);
1419   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1420   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1421   if (matout) {
1422     *matout = B;
1423   } else {
1424     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1431 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1432 {
1433   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1434   Mat        a = aij->A,b = aij->B;
1435   int        ierr,s1,s2,s3;
1436 
1437   PetscFunctionBegin;
1438   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1439   if (rr) {
1440     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1441     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1442     /* Overlap communication with computation. */
1443     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1444   }
1445   if (ll) {
1446     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1447     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1448     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1449   }
1450   /* scale  the diagonal block */
1451   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1452 
1453   if (rr) {
1454     /* Do a scatter end and then right scale the off-diagonal block */
1455     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1456     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1457   }
1458 
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1465 int MatPrintHelp_MPIAIJ(Mat A)
1466 {
1467   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1468   int        ierr;
1469 
1470   PetscFunctionBegin;
1471   if (!a->rank) {
1472     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1479 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1480 {
1481   PetscFunctionBegin;
1482   *bs = 1;
1483   PetscFunctionReturn(0);
1484 }
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1487 int MatSetUnfactored_MPIAIJ(Mat A)
1488 {
1489   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1490   int        ierr;
1491 
1492   PetscFunctionBegin;
1493   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatEqual_MPIAIJ"
1499 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1500 {
1501   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1502   Mat        a,b,c,d;
1503   PetscTruth flg;
1504   int        ierr;
1505 
1506   PetscFunctionBegin;
1507   a = matA->A; b = matA->B;
1508   c = matB->A; d = matB->B;
1509 
1510   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1511   if (flg == PETSC_TRUE) {
1512     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1513   }
1514   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatCopy_MPIAIJ"
1520 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1521 {
1522   int        ierr;
1523   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1524   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1525 
1526   PetscFunctionBegin;
1527   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1528   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1529     /* because of the column compression in the off-processor part of the matrix a->B,
1530        the number of columns in a->B and b->B may be different, hence we cannot call
1531        the MatCopy() directly on the two parts. If need be, we can provide a more
1532        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1533        then copying the submatrices */
1534     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1535   } else {
1536     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1537     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1538   }
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1544 int MatSetUpPreallocation_MPIAIJ(Mat A)
1545 {
1546   int        ierr;
1547 
1548   PetscFunctionBegin;
1549   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1554 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1555 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1556 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
1557 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1558 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1559 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1560 #endif
1561 
1562 #include "petscblaslapack.h"
1563 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure);
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatAXPY_MPIAIJ"
1566 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1567 {
1568   int        ierr,one=1,i;
1569   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1570   Mat_SeqAIJ *x,*y;
1571 
1572   PetscFunctionBegin;
1573   if (str == SAME_NONZERO_PATTERN) {
1574     x = (Mat_SeqAIJ *)xx->A->data;
1575     y = (Mat_SeqAIJ *)yy->A->data;
1576     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1577     x = (Mat_SeqAIJ *)xx->B->data;
1578     y = (Mat_SeqAIJ *)yy->B->data;
1579     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1580   } else if (str == SUBSET_NONZERO_PATTERN) {
1581     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1582 
1583     x = (Mat_SeqAIJ *)xx->B->data;
1584     y = (Mat_SeqAIJ *)yy->B->data;
1585     if (y->xtoy && y->XtoY != xx->B) {
1586       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1587       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1588     }
1589     if (!y->xtoy) { /* get xtoy */
1590       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1591       y->XtoY = xx->B;
1592     }
1593     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1594   } else {
1595     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 /* -------------------------------------------------------------------*/
1601 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1602        MatGetRow_MPIAIJ,
1603        MatRestoreRow_MPIAIJ,
1604        MatMult_MPIAIJ,
1605        MatMultAdd_MPIAIJ,
1606        MatMultTranspose_MPIAIJ,
1607        MatMultTransposeAdd_MPIAIJ,
1608        0,
1609        0,
1610        0,
1611        0,
1612        0,
1613        0,
1614        MatRelax_MPIAIJ,
1615        MatTranspose_MPIAIJ,
1616        MatGetInfo_MPIAIJ,
1617        MatEqual_MPIAIJ,
1618        MatGetDiagonal_MPIAIJ,
1619        MatDiagonalScale_MPIAIJ,
1620        MatNorm_MPIAIJ,
1621        MatAssemblyBegin_MPIAIJ,
1622        MatAssemblyEnd_MPIAIJ,
1623        0,
1624        MatSetOption_MPIAIJ,
1625        MatZeroEntries_MPIAIJ,
1626        MatZeroRows_MPIAIJ,
1627 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1628        MatLUFactorSymbolic_MPIAIJ_TFS,
1629 #else
1630        0,
1631 #endif
1632        0,
1633        0,
1634        0,
1635        MatSetUpPreallocation_MPIAIJ,
1636        0,
1637        0,
1638        0,
1639        0,
1640        MatDuplicate_MPIAIJ,
1641        0,
1642        0,
1643        0,
1644        0,
1645        MatAXPY_MPIAIJ,
1646        MatGetSubMatrices_MPIAIJ,
1647        MatIncreaseOverlap_MPIAIJ,
1648        MatGetValues_MPIAIJ,
1649        MatCopy_MPIAIJ,
1650        MatPrintHelp_MPIAIJ,
1651        MatScale_MPIAIJ,
1652        0,
1653        0,
1654        0,
1655        MatGetBlockSize_MPIAIJ,
1656        0,
1657        0,
1658        0,
1659        0,
1660        MatFDColoringCreate_MPIAIJ,
1661        0,
1662        MatSetUnfactored_MPIAIJ,
1663        0,
1664        0,
1665        MatGetSubMatrix_MPIAIJ,
1666        MatDestroy_MPIAIJ,
1667        MatView_MPIAIJ,
1668        MatGetPetscMaps_Petsc,
1669        0,
1670        0,
1671        0,
1672        0,
1673        0,
1674        0,
1675        0,
1676        0,
1677        MatSetColoring_MPIAIJ,
1678        MatSetValuesAdic_MPIAIJ,
1679        MatSetValuesAdifor_MPIAIJ
1680 };
1681 
1682 /* ----------------------------------------------------------------------------------------*/
1683 
1684 EXTERN_C_BEGIN
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1687 int MatStoreValues_MPIAIJ(Mat mat)
1688 {
1689   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1690   int        ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1694   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 EXTERN_C_END
1698 
1699 EXTERN_C_BEGIN
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1702 int MatRetrieveValues_MPIAIJ(Mat mat)
1703 {
1704   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1705   int        ierr;
1706 
1707   PetscFunctionBegin;
1708   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1709   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 EXTERN_C_END
1713 
1714 #include "petscpc.h"
1715 EXTERN_C_BEGIN
1716 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1717 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec);
1718 EXTERN_C_END
1719 
1720 EXTERN_C_BEGIN
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1723 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1724 {
1725   Mat_MPIAIJ   *b;
1726   int          ierr,i;
1727 
1728   PetscFunctionBegin;
1729   B->preallocated = PETSC_TRUE;
1730   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1731   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1732   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1733   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1734   if (d_nnz) {
1735     for (i=0; i<B->m; i++) {
1736       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1737     }
1738   }
1739   if (o_nnz) {
1740     for (i=0; i<B->m; i++) {
1741       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1742     }
1743   }
1744   b = (Mat_MPIAIJ*)B->data;
1745 
1746   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1747   PetscLogObjectParent(B,b->A);
1748   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1749   PetscLogObjectParent(B,b->B);
1750 
1751   PetscFunctionReturn(0);
1752 }
1753 EXTERN_C_END
1754 
1755 EXTERN_C_BEGIN
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatCreate_MPIAIJ"
1758 int MatCreate_MPIAIJ(Mat B)
1759 {
1760   Mat_MPIAIJ *b;
1761   int        ierr,i,size;
1762 
1763   PetscFunctionBegin;
1764   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1765 
1766   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1767   B->data         = (void*)b;
1768   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1769   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1770   B->factor       = 0;
1771   B->assembled    = PETSC_FALSE;
1772   B->mapping      = 0;
1773 
1774   B->insertmode      = NOT_SET_VALUES;
1775   b->size            = size;
1776   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1777 
1778   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1779   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1780 
1781   /* the information in the maps duplicates the information computed below, eventually
1782      we should remove the duplicate information that is not contained in the maps */
1783   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1784   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1785 
1786   /* build local table of row and column ownerships */
1787   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1788   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1789   b->cowners = b->rowners + b->size + 2;
1790   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1791   b->rowners[0] = 0;
1792   for (i=2; i<=b->size; i++) {
1793     b->rowners[i] += b->rowners[i-1];
1794   }
1795   b->rstart = b->rowners[b->rank];
1796   b->rend   = b->rowners[b->rank+1];
1797   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1798   b->cowners[0] = 0;
1799   for (i=2; i<=b->size; i++) {
1800     b->cowners[i] += b->cowners[i-1];
1801   }
1802   b->cstart = b->cowners[b->rank];
1803   b->cend   = b->cowners[b->rank+1];
1804 
1805   /* build cache for off array entries formed */
1806   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1807   b->donotstash  = PETSC_FALSE;
1808   b->colmap      = 0;
1809   b->garray      = 0;
1810   b->roworiented = PETSC_TRUE;
1811 
1812   /* stuff used for matrix vector multiply */
1813   b->lvec      = PETSC_NULL;
1814   b->Mvctx     = PETSC_NULL;
1815 
1816   /* stuff for MatGetRow() */
1817   b->rowindices   = 0;
1818   b->rowvalues    = 0;
1819   b->getrowactive = PETSC_FALSE;
1820 
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1822                                      "MatStoreValues_MPIAIJ",
1823                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1825                                      "MatRetrieveValues_MPIAIJ",
1826                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1828 				     "MatGetDiagonalBlock_MPIAIJ",
1829                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1831 				     "MatIsSymmetric_MPIAIJ",
1832 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1834 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1835 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1837 				     "MatDiagonalScaleLocal_MPIAIJ",
1838 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 EXTERN_C_END
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1845 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1846 {
1847   Mat        mat;
1848   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1849   int        ierr;
1850 
1851   PetscFunctionBegin;
1852   *newmat       = 0;
1853   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1854   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1855   a    = (Mat_MPIAIJ*)mat->data;
1856   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1857   mat->factor       = matin->factor;
1858   mat->assembled    = PETSC_TRUE;
1859   mat->insertmode   = NOT_SET_VALUES;
1860   mat->preallocated = PETSC_TRUE;
1861 
1862   a->rstart       = oldmat->rstart;
1863   a->rend         = oldmat->rend;
1864   a->cstart       = oldmat->cstart;
1865   a->cend         = oldmat->cend;
1866   a->size         = oldmat->size;
1867   a->rank         = oldmat->rank;
1868   a->donotstash   = oldmat->donotstash;
1869   a->roworiented  = oldmat->roworiented;
1870   a->rowindices   = 0;
1871   a->rowvalues    = 0;
1872   a->getrowactive = PETSC_FALSE;
1873 
1874   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1875   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1876   if (oldmat->colmap) {
1877 #if defined (PETSC_USE_CTABLE)
1878     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1879 #else
1880     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1881     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1882     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1883 #endif
1884   } else a->colmap = 0;
1885   if (oldmat->garray) {
1886     int len;
1887     len  = oldmat->B->n;
1888     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1889     PetscLogObjectMemory(mat,len*sizeof(int));
1890     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1891   } else a->garray = 0;
1892 
1893   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1894   PetscLogObjectParent(mat,a->lvec);
1895   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1896   PetscLogObjectParent(mat,a->Mvctx);
1897   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1898   PetscLogObjectParent(mat,a->A);
1899   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1900   PetscLogObjectParent(mat,a->B);
1901   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1902   *newmat = mat;
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #include "petscsys.h"
1907 
1908 EXTERN_C_BEGIN
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatLoad_MPIAIJ"
1911 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1912 {
1913   Mat          A;
1914   PetscScalar  *vals,*svals;
1915   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1916   MPI_Status   status;
1917   int          i,nz,ierr,j,rstart,rend,fd;
1918   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1919   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1920   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1921 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
1922   PetscTruth   flag;
1923 #endif
1924 
1925   PetscFunctionBegin;
1926   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1927   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1928   if (!rank) {
1929     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1930     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1931     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1932     if (header[3] < 0) {
1933       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1934     }
1935   }
1936 
1937   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1938   M = header[1]; N = header[2];
1939   /* determine ownership of all rows */
1940   m = M/size + ((M % size) > rank);
1941   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1942   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1943   rowners[0] = 0;
1944   for (i=2; i<=size; i++) {
1945     rowners[i] += rowners[i-1];
1946   }
1947   rstart = rowners[rank];
1948   rend   = rowners[rank+1];
1949 
1950   /* distribute row lengths to all processors */
1951   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1952   offlens = ourlens + (rend-rstart);
1953   if (!rank) {
1954     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1955     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1956     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1957     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1958     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1959     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1960   } else {
1961     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1962   }
1963 
1964   if (!rank) {
1965     /* calculate the number of nonzeros on each processor */
1966     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1967     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1968     for (i=0; i<size; i++) {
1969       for (j=rowners[i]; j< rowners[i+1]; j++) {
1970         procsnz[i] += rowlengths[j];
1971       }
1972     }
1973     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1974 
1975     /* determine max buffer needed and allocate it */
1976     maxnz = 0;
1977     for (i=0; i<size; i++) {
1978       maxnz = PetscMax(maxnz,procsnz[i]);
1979     }
1980     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1981 
1982     /* read in my part of the matrix column indices  */
1983     nz   = procsnz[0];
1984     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1985     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1986 
1987     /* read in every one elses and ship off */
1988     for (i=1; i<size; i++) {
1989       nz   = procsnz[i];
1990       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1991       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1992     }
1993     ierr = PetscFree(cols);CHKERRQ(ierr);
1994   } else {
1995     /* determine buffer space needed for message */
1996     nz = 0;
1997     for (i=0; i<m; i++) {
1998       nz += ourlens[i];
1999     }
2000     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2001 
2002     /* receive message of column indices*/
2003     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2004     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2005     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2006   }
2007 
2008   /* determine column ownership if matrix is not square */
2009   if (N != M) {
2010     n      = N/size + ((N % size) > rank);
2011     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2012     cstart = cend - n;
2013   } else {
2014     cstart = rstart;
2015     cend   = rend;
2016     n      = cend - cstart;
2017   }
2018 
2019   /* loop over local rows, determining number of off diagonal entries */
2020   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2021   jj = 0;
2022   for (i=0; i<m; i++) {
2023     for (j=0; j<ourlens[i]; j++) {
2024       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2025       jj++;
2026     }
2027   }
2028 
2029   /* create our matrix */
2030   for (i=0; i<m; i++) {
2031     ourlens[i] -= offlens[i];
2032   }
2033   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2034   A = *newmat;
2035   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2036   for (i=0; i<m; i++) {
2037     ourlens[i] += offlens[i];
2038   }
2039 
2040   if (!rank) {
2041     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2042 
2043     /* read in my part of the matrix numerical values  */
2044     nz   = procsnz[0];
2045     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2046 
2047     /* insert into matrix */
2048     jj      = rstart;
2049     smycols = mycols;
2050     svals   = vals;
2051     for (i=0; i<m; i++) {
2052       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2053       smycols += ourlens[i];
2054       svals   += ourlens[i];
2055       jj++;
2056     }
2057 
2058     /* read in other processors and ship out */
2059     for (i=1; i<size; i++) {
2060       nz   = procsnz[i];
2061       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2062       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2063     }
2064     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2065   } else {
2066     /* receive numeric values */
2067     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2068 
2069     /* receive message of values*/
2070     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2071     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2072     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2073 
2074     /* insert into matrix */
2075     jj      = rstart;
2076     smycols = mycols;
2077     svals   = vals;
2078     for (i=0; i<m; i++) {
2079       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2080       smycols += ourlens[i];
2081       svals   += ourlens[i];
2082       jj++;
2083     }
2084   }
2085   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2086   ierr = PetscFree(vals);CHKERRQ(ierr);
2087   ierr = PetscFree(mycols);CHKERRQ(ierr);
2088   ierr = PetscFree(rowners);CHKERRQ(ierr);
2089 
2090   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2091   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092 #if defined(PETSC_HAVE_SPOOLES)
2093   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2094   if (flag) {
2095     if (size == 1) {
2096       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2097     } else {
2098       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2099     }
2100   }
2101 #endif
2102 #if defined(PETSC_HAVE_SUPERLUDIST)
2103   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2104   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2105 #endif
2106 #if defined(PETSC_HAVE_MUMPS)
2107   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2108   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2109 #endif
2110   PetscFunctionReturn(0);
2111 }
2112 EXTERN_C_END
2113 
2114 #undef __FUNCT__
2115 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2116 /*
2117     Not great since it makes two copies of the submatrix, first an SeqAIJ
2118   in local and then by concatenating the local matrices the end result.
2119   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2120 */
2121 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2122 {
2123   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2124   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2125   Mat          *local,M,Mreuse;
2126   PetscScalar  *vwork,*aa;
2127   MPI_Comm     comm = mat->comm;
2128   Mat_SeqAIJ   *aij;
2129 
2130 
2131   PetscFunctionBegin;
2132   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2133   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2134 
2135   if (call ==  MAT_REUSE_MATRIX) {
2136     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2137     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2138     local = &Mreuse;
2139     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2140   } else {
2141     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2142     Mreuse = *local;
2143     ierr   = PetscFree(local);CHKERRQ(ierr);
2144   }
2145 
2146   /*
2147       m - number of local rows
2148       n - number of columns (same on all processors)
2149       rstart - first row in new global matrix generated
2150   */
2151   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2152   if (call == MAT_INITIAL_MATRIX) {
2153     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2154     ii  = aij->i;
2155     jj  = aij->j;
2156 
2157     /*
2158         Determine the number of non-zeros in the diagonal and off-diagonal
2159         portions of the matrix in order to do correct preallocation
2160     */
2161 
2162     /* first get start and end of "diagonal" columns */
2163     if (csize == PETSC_DECIDE) {
2164       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2165       if (mglobal == n) { /* square matrix */
2166 	nlocal = m;
2167       } else {
2168         nlocal = n/size + ((n % size) > rank);
2169       }
2170     } else {
2171       nlocal = csize;
2172     }
2173     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2174     rstart = rend - nlocal;
2175     if (rank == size - 1 && rend != n) {
2176       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2177     }
2178 
2179     /* next, compute all the lengths */
2180     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2181     olens = dlens + m;
2182     for (i=0; i<m; i++) {
2183       jend = ii[i+1] - ii[i];
2184       olen = 0;
2185       dlen = 0;
2186       for (j=0; j<jend; j++) {
2187         if (*jj < rstart || *jj >= rend) olen++;
2188         else dlen++;
2189         jj++;
2190       }
2191       olens[i] = olen;
2192       dlens[i] = dlen;
2193     }
2194     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2195     ierr = PetscFree(dlens);CHKERRQ(ierr);
2196   } else {
2197     int ml,nl;
2198 
2199     M = *newmat;
2200     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2201     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2202     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2203     /*
2204          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2205        rather than the slower MatSetValues().
2206     */
2207     M->was_assembled = PETSC_TRUE;
2208     M->assembled     = PETSC_FALSE;
2209   }
2210   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2211   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2212   ii  = aij->i;
2213   jj  = aij->j;
2214   aa  = aij->a;
2215   for (i=0; i<m; i++) {
2216     row   = rstart + i;
2217     nz    = ii[i+1] - ii[i];
2218     cwork = jj;     jj += nz;
2219     vwork = aa;     aa += nz;
2220     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2221   }
2222 
2223   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2224   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2225   *newmat = M;
2226 
2227   /* save submatrix used in processor for next request */
2228   if (call ==  MAT_INITIAL_MATRIX) {
2229     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2230     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2231   }
2232 
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2238 /*@C
2239    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2240    (the default parallel PETSc format).  For good matrix assembly performance
2241    the user should preallocate the matrix storage by setting the parameters
2242    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2243    performance can be increased by more than a factor of 50.
2244 
2245    Collective on MPI_Comm
2246 
2247    Input Parameters:
2248 +  A - the matrix
2249 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2250            (same value is used for all local rows)
2251 .  d_nnz - array containing the number of nonzeros in the various rows of the
2252            DIAGONAL portion of the local submatrix (possibly different for each row)
2253            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2254            The size of this array is equal to the number of local rows, i.e 'm'.
2255            You must leave room for the diagonal entry even if it is zero.
2256 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2257            submatrix (same value is used for all local rows).
2258 -  o_nnz - array containing the number of nonzeros in the various rows of the
2259            OFF-DIAGONAL portion of the local submatrix (possibly different for
2260            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2261            structure. The size of this array is equal to the number
2262            of local rows, i.e 'm'.
2263 
2264    The AIJ format (also called the Yale sparse matrix format or
2265    compressed row storage), is fully compatible with standard Fortran 77
2266    storage.  That is, the stored row and column indices can begin at
2267    either one (as in Fortran) or zero.  See the users manual for details.
2268 
2269    The user MUST specify either the local or global matrix dimensions
2270    (possibly both).
2271 
2272    The parallel matrix is partitioned such that the first m0 rows belong to
2273    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2274    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2275 
2276    The DIAGONAL portion of the local submatrix of a processor can be defined
2277    as the submatrix which is obtained by extraction the part corresponding
2278    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2279    first row that belongs to the processor, and r2 is the last row belonging
2280    to the this processor. This is a square mxm matrix. The remaining portion
2281    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2282 
2283    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2284 
2285    By default, this format uses inodes (identical nodes) when possible.
2286    We search for consecutive rows with the same nonzero structure, thereby
2287    reusing matrix information to achieve increased efficiency.
2288 
2289    Options Database Keys:
2290 +  -mat_aij_no_inode  - Do not use inodes
2291 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2292 -  -mat_aij_oneindex - Internally use indexing starting at 1
2293         rather than 0.  Note that when calling MatSetValues(),
2294         the user still MUST index entries starting at 0!
2295 
2296    Example usage:
2297 
2298    Consider the following 8x8 matrix with 34 non-zero values, that is
2299    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2300    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2301    as follows:
2302 
2303 .vb
2304             1  2  0  |  0  3  0  |  0  4
2305     Proc0   0  5  6  |  7  0  0  |  8  0
2306             9  0 10  | 11  0  0  | 12  0
2307     -------------------------------------
2308            13  0 14  | 15 16 17  |  0  0
2309     Proc1   0 18  0  | 19 20 21  |  0  0
2310             0  0  0  | 22 23  0  | 24  0
2311     -------------------------------------
2312     Proc2  25 26 27  |  0  0 28  | 29  0
2313            30  0  0  | 31 32 33  |  0 34
2314 .ve
2315 
2316    This can be represented as a collection of submatrices as:
2317 
2318 .vb
2319       A B C
2320       D E F
2321       G H I
2322 .ve
2323 
2324    Where the submatrices A,B,C are owned by proc0, D,E,F are
2325    owned by proc1, G,H,I are owned by proc2.
2326 
2327    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2328    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2329    The 'M','N' parameters are 8,8, and have the same values on all procs.
2330 
2331    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2332    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2333    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2334    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2335    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2336    matrix, ans [DF] as another SeqAIJ matrix.
2337 
2338    When d_nz, o_nz parameters are specified, d_nz storage elements are
2339    allocated for every row of the local diagonal submatrix, and o_nz
2340    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2341    One way to choose d_nz and o_nz is to use the max nonzerors per local
2342    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2343    In this case, the values of d_nz,o_nz are:
2344 .vb
2345      proc0 : dnz = 2, o_nz = 2
2346      proc1 : dnz = 3, o_nz = 2
2347      proc2 : dnz = 1, o_nz = 4
2348 .ve
2349    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2350    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2351    for proc3. i.e we are using 12+15+10=37 storage locations to store
2352    34 values.
2353 
2354    When d_nnz, o_nnz parameters are specified, the storage is specified
2355    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2356    In the above case the values for d_nnz,o_nnz are:
2357 .vb
2358      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2359      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2360      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2361 .ve
2362    Here the space allocated is sum of all the above values i.e 34, and
2363    hence pre-allocation is perfect.
2364 
2365    Level: intermediate
2366 
2367 .keywords: matrix, aij, compressed row, sparse, parallel
2368 
2369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2370 @*/
2371 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2372 {
2373   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2374 
2375   PetscFunctionBegin;
2376   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2377   if (f) {
2378     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatCreateMPIAIJ"
2385 /*@C
2386    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2387    (the default parallel PETSc format).  For good matrix assembly performance
2388    the user should preallocate the matrix storage by setting the parameters
2389    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2390    performance can be increased by more than a factor of 50.
2391 
2392    Collective on MPI_Comm
2393 
2394    Input Parameters:
2395 +  comm - MPI communicator
2396 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2397            This value should be the same as the local size used in creating the
2398            y vector for the matrix-vector product y = Ax.
2399 .  n - This value should be the same as the local size used in creating the
2400        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2401        calculated if N is given) For square matrices n is almost always m.
2402 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2403 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2404 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2405            (same value is used for all local rows)
2406 .  d_nnz - array containing the number of nonzeros in the various rows of the
2407            DIAGONAL portion of the local submatrix (possibly different for each row)
2408            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2409            The size of this array is equal to the number of local rows, i.e 'm'.
2410            You must leave room for the diagonal entry even if it is zero.
2411 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2412            submatrix (same value is used for all local rows).
2413 -  o_nnz - array containing the number of nonzeros in the various rows of the
2414            OFF-DIAGONAL portion of the local submatrix (possibly different for
2415            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2416            structure. The size of this array is equal to the number
2417            of local rows, i.e 'm'.
2418 
2419    Output Parameter:
2420 .  A - the matrix
2421 
2422    Notes:
2423    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2424    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2425    storage requirements for this matrix.
2426 
2427    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2428    processor than it must be used on all processors that share the object for
2429    that argument.
2430 
2431    The AIJ format (also called the Yale sparse matrix format or
2432    compressed row storage), is fully compatible with standard Fortran 77
2433    storage.  That is, the stored row and column indices can begin at
2434    either one (as in Fortran) or zero.  See the users manual for details.
2435 
2436    The user MUST specify either the local or global matrix dimensions
2437    (possibly both).
2438 
2439    The parallel matrix is partitioned such that the first m0 rows belong to
2440    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2441    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2442 
2443    The DIAGONAL portion of the local submatrix of a processor can be defined
2444    as the submatrix which is obtained by extraction the part corresponding
2445    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2446    first row that belongs to the processor, and r2 is the last row belonging
2447    to the this processor. This is a square mxm matrix. The remaining portion
2448    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2449 
2450    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2451 
2452    When calling this routine with a single process communicator, a matrix of
2453    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2454    type of communicator, use the construction mechanism:
2455      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2456 
2457    By default, this format uses inodes (identical nodes) when possible.
2458    We search for consecutive rows with the same nonzero structure, thereby
2459    reusing matrix information to achieve increased efficiency.
2460 
2461    Options Database Keys:
2462 +  -mat_aij_no_inode  - Do not use inodes
2463 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2464 -  -mat_aij_oneindex - Internally use indexing starting at 1
2465         rather than 0.  Note that when calling MatSetValues(),
2466         the user still MUST index entries starting at 0!
2467 
2468 
2469    Example usage:
2470 
2471    Consider the following 8x8 matrix with 34 non-zero values, that is
2472    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2473    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2474    as follows:
2475 
2476 .vb
2477             1  2  0  |  0  3  0  |  0  4
2478     Proc0   0  5  6  |  7  0  0  |  8  0
2479             9  0 10  | 11  0  0  | 12  0
2480     -------------------------------------
2481            13  0 14  | 15 16 17  |  0  0
2482     Proc1   0 18  0  | 19 20 21  |  0  0
2483             0  0  0  | 22 23  0  | 24  0
2484     -------------------------------------
2485     Proc2  25 26 27  |  0  0 28  | 29  0
2486            30  0  0  | 31 32 33  |  0 34
2487 .ve
2488 
2489    This can be represented as a collection of submatrices as:
2490 
2491 .vb
2492       A B C
2493       D E F
2494       G H I
2495 .ve
2496 
2497    Where the submatrices A,B,C are owned by proc0, D,E,F are
2498    owned by proc1, G,H,I are owned by proc2.
2499 
2500    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2501    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2502    The 'M','N' parameters are 8,8, and have the same values on all procs.
2503 
2504    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2505    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2506    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2507    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2508    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2509    matrix, ans [DF] as another SeqAIJ matrix.
2510 
2511    When d_nz, o_nz parameters are specified, d_nz storage elements are
2512    allocated for every row of the local diagonal submatrix, and o_nz
2513    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2514    One way to choose d_nz and o_nz is to use the max nonzerors per local
2515    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2516    In this case, the values of d_nz,o_nz are:
2517 .vb
2518      proc0 : dnz = 2, o_nz = 2
2519      proc1 : dnz = 3, o_nz = 2
2520      proc2 : dnz = 1, o_nz = 4
2521 .ve
2522    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2523    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2524    for proc3. i.e we are using 12+15+10=37 storage locations to store
2525    34 values.
2526 
2527    When d_nnz, o_nnz parameters are specified, the storage is specified
2528    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2529    In the above case the values for d_nnz,o_nnz are:
2530 .vb
2531      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2532      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2533      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2534 .ve
2535    Here the space allocated is sum of all the above values i.e 34, and
2536    hence pre-allocation is perfect.
2537 
2538    Level: intermediate
2539 
2540 .keywords: matrix, aij, compressed row, sparse, parallel
2541 
2542 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2543 @*/
2544 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2545 {
2546   int ierr,size;
2547 
2548   PetscFunctionBegin;
2549   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2550   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2551   if (size > 1) {
2552     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2553     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2554   } else {
2555     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2556     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2563 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2564 {
2565   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2566   PetscFunctionBegin;
2567   *Ad     = a->A;
2568   *Ao     = a->B;
2569   *colmap = a->garray;
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #undef __FUNCT__
2574 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2575 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2576 {
2577   int        ierr,i;
2578   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2579 
2580   PetscFunctionBegin;
2581   if (coloring->ctype == IS_COLORING_LOCAL) {
2582     ISColoringValue *allcolors,*colors;
2583     ISColoring      ocoloring;
2584 
2585     /* set coloring for diagonal portion */
2586     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2587 
2588     /* set coloring for off-diagonal portion */
2589     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2590     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2591     for (i=0; i<a->B->n; i++) {
2592       colors[i] = allcolors[a->garray[i]];
2593     }
2594     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2595     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2596     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2597     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2598   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2599     ISColoringValue *colors;
2600     int             *larray;
2601     ISColoring      ocoloring;
2602 
2603     /* set coloring for diagonal portion */
2604     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2605     for (i=0; i<a->A->n; i++) {
2606       larray[i] = i + a->cstart;
2607     }
2608     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2609     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2610     for (i=0; i<a->A->n; i++) {
2611       colors[i] = coloring->colors[larray[i]];
2612     }
2613     ierr = PetscFree(larray);CHKERRQ(ierr);
2614     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2615     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2616     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2617 
2618     /* set coloring for off-diagonal portion */
2619     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2620     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2621     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2622     for (i=0; i<a->B->n; i++) {
2623       colors[i] = coloring->colors[larray[i]];
2624     }
2625     ierr = PetscFree(larray);CHKERRQ(ierr);
2626     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2627     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2628     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2629   } else {
2630     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2631   }
2632 
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 #undef __FUNCT__
2637 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2638 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2639 {
2640   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2641   int        ierr;
2642 
2643   PetscFunctionBegin;
2644   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2645   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2651 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2652 {
2653   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2654   int        ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2658   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatMerge"
2664 /*@C
2665       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2666                  matrices from each processor
2667 
2668     Collective on MPI_Comm
2669 
2670    Input Parameters:
2671 +    comm - the communicators the parallel matrix will live on
2672 -    inmat - the input sequential matrices
2673 
2674    Output Parameter:
2675 .    outmat - the parallel matrix generated
2676 
2677     Level: advanced
2678 
2679    Notes: The number of columns of the matrix in EACH of the seperate files
2680       MUST be the same.
2681 
2682 @*/
2683 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2684 {
2685   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2686   PetscScalar *values;
2687   PetscMap    columnmap,rowmap;
2688 
2689   PetscFunctionBegin;
2690 
2691   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2692 
2693   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2694   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2695   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2696   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2697   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2698   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2699 
2700   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2701   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2702   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2703   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2704   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2705 
2706   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2707   for (i=0;i<m;i++) {
2708     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2709     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2710     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2711   }
2712   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2713   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2714 
2715   for (i=0;i<m;i++) {
2716     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2717     I    = i + rstart;
2718     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2719     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2720   }
2721   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2722   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2723   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724 
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 #undef __FUNCT__
2729 #define __FUNCT__ "MatFileSplit"
2730 int MatFileSplit(Mat A,char *outfile)
2731 {
2732   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2733   PetscViewer out;
2734   char        *name;
2735   Mat         B;
2736   PetscScalar *values;
2737 
2738   PetscFunctionBegin;
2739 
2740   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2741   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2742   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2743   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2744   for (i=0;i<m;i++) {
2745     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2746     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2747     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2748   }
2749   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2751 
2752   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2753   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2754   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2755   sprintf(name,"%s.%d",outfile,rank);
2756   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2757   ierr = PetscFree(name);
2758   ierr = MatView(B,out);CHKERRQ(ierr);
2759   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2760   ierr = MatDestroy(B);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763