xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a23d5ecec69aaee6ed47a85ba9d72960301bf762)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421 
422 #if defined(PETSC_HAVE_MUMPS)
423   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
424   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
431 int MatZeroEntries_MPIAIJ(Mat A)
432 {
433   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
434   int        ierr;
435 
436   PetscFunctionBegin;
437   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
438   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatZeroRows_MPIAIJ"
444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
445 {
446   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
447   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
448   int            *nprocs,j,idx,nsends,row;
449   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
450   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
451   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
452   MPI_Comm       comm = A->comm;
453   MPI_Request    *send_waits,*recv_waits;
454   MPI_Status     recv_status,*send_status;
455   IS             istmp;
456   PetscTruth     found;
457 
458   PetscFunctionBegin;
459   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
460   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
461 
462   /*  first count number of contributors to each processor */
463   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
464   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
465   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
466   for (i=0; i<N; i++) {
467     idx = rows[i];
468     found = PETSC_FALSE;
469     for (j=0; j<size; j++) {
470       if (idx >= owners[j] && idx < owners[j+1]) {
471         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
472       }
473     }
474     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   PetscLogObjectParent(A,istmp);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   int        ierr,nt;
595 
596   PetscFunctionBegin;
597   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
598   if (nt != A->n) {
599     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
600   }
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_MPIAIJ"
610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
613   int        ierr;
614 
615   PetscFunctionBegin;
616   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
617   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
618   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
619   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
648 {
649   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
651   MatType    type;
652   IS         Me,Notme;
653   int        M,N,first,last,*notme,i, ierr;
654 
655   PetscFunctionBegin;
656 
657   /* Compatible types */
658   ierr = MatGetType(Bmat,&type); CHKERRQ(ierr);
659   ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr);
660   if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too");
661 
662   /* Easy test: symmetric diagonal block */
663   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
664   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
665   if (!*f) PetscFunctionReturn(0);
666 
667   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
668   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
669   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
670   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
671   for (i=0; i<first; i++) notme[i] = i;
672   for (i=last; i<M; i++) notme[i-last+first] = i;
673   ierr = ISCreateGeneral
674     (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
675   ierr = ISCreateStride
676     (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
677   ierr = MatGetSubMatrices
678     (Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
679   Aoff = Aoffs[0];
680   ierr = MatGetSubMatrices
681     (Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
682   Boff = Boffs[0];
683   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
684   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
685   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
686   ierr = ISDestroy(Me); CHKERRQ(ierr);
687   ierr = ISDestroy(Notme); CHKERRQ(ierr);
688 
689   PetscFunctionReturn(0);
690 }
691 EXTERN_C_END
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
695 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
696 {
697   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
698   int        ierr;
699 
700   PetscFunctionBegin;
701   /* do nondiagonal part */
702   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
703   /* send it on its way */
704   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
705   /* do local part */
706   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
707   /* receive remote parts: note this assumes the values are not actually */
708   /* inserted in yy until the next line, which is true for my implementation*/
709   /* but is not perhaps always true. */
710   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 /*
715   This only works correctly for square matrices where the subblock A->A is the
716    diagonal block
717 */
718 #undef __FUNCT__
719 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
720 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
721 {
722   int        ierr;
723   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
724 
725   PetscFunctionBegin;
726   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
727   if (a->rstart != a->cstart || a->rend != a->cend) {
728     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
729   }
730   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatScale_MPIAIJ"
736 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
737 {
738   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
739   int        ierr;
740 
741   PetscFunctionBegin;
742   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
743   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatDestroy_MPIAIJ"
749 int MatDestroy_MPIAIJ(Mat mat)
750 {
751   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
752   int        ierr;
753 
754   PetscFunctionBegin;
755 #if defined(PETSC_USE_LOG)
756   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
757 #endif
758   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
759   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
760   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
761   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
762 #if defined (PETSC_USE_CTABLE)
763   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
764 #else
765   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
766 #endif
767   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
768   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
769   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
770   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
771   ierr = PetscFree(aij);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
776 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
777 extern int MatFactorInfo_MUMPS(Mat,PetscViewer);
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatView_MPIAIJ_Binary"
781 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
782 {
783   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
784   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
785   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
786   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
787   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
788   PetscScalar       *column_values;
789 
790   PetscFunctionBegin;
791   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
793   nz   = A->nz + B->nz;
794   if (rank == 0) {
795     header[0] = MAT_FILE_COOKIE;
796     header[1] = mat->M;
797     header[2] = mat->N;
798     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
799     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
800     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
801     /* get largest number of rows any processor has */
802     rlen = mat->m;
803     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
804     for (i=1; i<size; i++) {
805       rlen = PetscMax(rlen,range[i+1] - range[i]);
806     }
807   } else {
808     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
809     rlen = mat->m;
810   }
811 
812   /* load up the local row counts */
813   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
814   for (i=0; i<mat->m; i++) {
815     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
816   }
817 
818   /* store the row lengths to the file */
819   if (rank == 0) {
820     MPI_Status status;
821     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
822     for (i=1; i<size; i++) {
823       rlen = range[i+1] - range[i];
824       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
826     }
827   } else {
828     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
831 
832   /* load up the local column indices */
833   nzmax = nz; /* )th processor needs space a largest processor needs */
834   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
835   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
836   cnt  = 0;
837   for (i=0; i<mat->m; i++) {
838     for (j=B->i[i]; j<B->i[i+1]; j++) {
839       if ( (col = garray[B->j[j]]) > cstart) break;
840       column_indices[cnt++] = col;
841     }
842     for (k=A->i[i]; k<A->i[i+1]; k++) {
843       column_indices[cnt++] = A->j[k] + cstart;
844     }
845     for (; j<B->i[i+1]; j++) {
846       column_indices[cnt++] = garray[B->j[j]];
847     }
848   }
849   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
850 
851   /* store the column indices to the file */
852   if (rank == 0) {
853     MPI_Status status;
854     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
855     for (i=1; i<size; i++) {
856       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
858       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
859       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
860     }
861   } else {
862     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
863     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(column_indices);CHKERRQ(ierr);
866 
867   /* load up the local column values */
868   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
869   cnt  = 0;
870   for (i=0; i<mat->m; i++) {
871     for (j=B->i[i]; j<B->i[i+1]; j++) {
872       if ( garray[B->j[j]] > cstart) break;
873       column_values[cnt++] = B->a[j];
874     }
875     for (k=A->i[i]; k<A->i[i+1]; k++) {
876       column_values[cnt++] = A->a[k];
877     }
878     for (; j<B->i[i+1]; j++) {
879       column_values[cnt++] = B->a[j];
880     }
881   }
882   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
883 
884   /* store the column values to the file */
885   if (rank == 0) {
886     MPI_Status status;
887     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
888     for (i=1; i<size; i++) {
889       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
890       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
891       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
892       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
893     }
894   } else {
895     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
896     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(column_values);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
904 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
905 {
906   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
907   int               ierr,rank = aij->rank,size = aij->size;
908   PetscTruth        isdraw,isascii,flg,isbinary;
909   PetscViewer       sviewer;
910   PetscViewerFormat format;
911 
912   PetscFunctionBegin;
913   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
914   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
915   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
916   if (isascii) {
917     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
918     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
919       MatInfo info;
920       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
921       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
922       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
923       if (flg) {
924         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
925 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
926       } else {
927         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
928 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
929       }
930       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
931       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
932       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
933       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
934       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
935       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
936       PetscFunctionReturn(0);
937     } else if (format == PETSC_VIEWER_ASCII_INFO) {
938       PetscFunctionReturn(0);
939     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
940 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
941       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
942 #endif
943 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
944       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
945 #endif
946 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE)
947       ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr);
948 #endif
949       PetscFunctionReturn(0);
950     }
951   } else if (isbinary) {
952     if (size == 1) {
953       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
954       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
955     } else {
956       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
957     }
958     PetscFunctionReturn(0);
959   } else if (isdraw) {
960     PetscDraw  draw;
961     PetscTruth isnull;
962     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
963     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
964   }
965 
966   if (size == 1) {
967     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
968     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
969   } else {
970     /* assemble the entire matrix onto first processor. */
971     Mat         A;
972     Mat_SeqAIJ *Aloc;
973     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
974     PetscScalar *a;
975 
976     if (!rank) {
977       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
978     } else {
979       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
980     }
981     PetscLogObjectParent(mat,A);
982 
983     /* copy over the A part */
984     Aloc = (Mat_SeqAIJ*)aij->A->data;
985     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
986     row = aij->rstart;
987     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
988     for (i=0; i<m; i++) {
989       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
990       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
991     }
992     aj = Aloc->j;
993     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
994 
995     /* copy over the B part */
996     Aloc = (Mat_SeqAIJ*)aij->B->data;
997     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
998     row  = aij->rstart;
999     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
1000     ct   = cols;
1001     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1002     for (i=0; i<m; i++) {
1003       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1004       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1005     }
1006     ierr = PetscFree(ct);CHKERRQ(ierr);
1007     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1008     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1009     /*
1010        Everyone has to call to draw the matrix since the graphics waits are
1011        synchronized across all processors that share the PetscDraw object
1012     */
1013     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1014     if (!rank) {
1015       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1016       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1017     }
1018     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1019     ierr = MatDestroy(A);CHKERRQ(ierr);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatView_MPIAIJ"
1026 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1027 {
1028   int        ierr;
1029   PetscTruth isascii,isdraw,issocket,isbinary;
1030 
1031   PetscFunctionBegin;
1032   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1033   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1034   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1035   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1036   if (isascii || isdraw || isbinary || issocket) {
1037     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1038   } else {
1039     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1040   }
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatRelax_MPIAIJ"
1048 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1049 {
1050   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1051   int          ierr;
1052   Vec          bb1;
1053   PetscScalar  mone=-1.0;
1054 
1055   PetscFunctionBegin;
1056   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1057 
1058   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1059 
1060   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1061     if (flag & SOR_ZERO_INITIAL_GUESS) {
1062       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1063       its--;
1064     }
1065 
1066     while (its--) {
1067       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1076       CHKERRQ(ierr);
1077     }
1078   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1079     if (flag & SOR_ZERO_INITIAL_GUESS) {
1080       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1081       its--;
1082     }
1083     while (its--) {
1084       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1085       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1086 
1087       /* update rhs: bb1 = bb - B*x */
1088       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1089       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1090 
1091       /* local sweep */
1092       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1093       CHKERRQ(ierr);
1094     }
1095   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1096     if (flag & SOR_ZERO_INITIAL_GUESS) {
1097       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1098       its--;
1099     }
1100     while (its--) {
1101       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1102       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1103 
1104       /* update rhs: bb1 = bb - B*x */
1105       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1106       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1107 
1108       /* local sweep */
1109       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1110       CHKERRQ(ierr);
1111     }
1112   } else {
1113     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1114   }
1115 
1116   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1122 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1123 {
1124   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1125   Mat        A = mat->A,B = mat->B;
1126   int        ierr;
1127   PetscReal  isend[5],irecv[5];
1128 
1129   PetscFunctionBegin;
1130   info->block_size     = 1.0;
1131   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1132   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1133   isend[3] = info->memory;  isend[4] = info->mallocs;
1134   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1135   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1136   isend[3] += info->memory;  isend[4] += info->mallocs;
1137   if (flag == MAT_LOCAL) {
1138     info->nz_used      = isend[0];
1139     info->nz_allocated = isend[1];
1140     info->nz_unneeded  = isend[2];
1141     info->memory       = isend[3];
1142     info->mallocs      = isend[4];
1143   } else if (flag == MAT_GLOBAL_MAX) {
1144     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1145     info->nz_used      = irecv[0];
1146     info->nz_allocated = irecv[1];
1147     info->nz_unneeded  = irecv[2];
1148     info->memory       = irecv[3];
1149     info->mallocs      = irecv[4];
1150   } else if (flag == MAT_GLOBAL_SUM) {
1151     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1152     info->nz_used      = irecv[0];
1153     info->nz_allocated = irecv[1];
1154     info->nz_unneeded  = irecv[2];
1155     info->memory       = irecv[3];
1156     info->mallocs      = irecv[4];
1157   }
1158   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1159   info->fill_ratio_needed = 0;
1160   info->factor_mallocs    = 0;
1161   info->rows_global       = (double)matin->M;
1162   info->columns_global    = (double)matin->N;
1163   info->rows_local        = (double)matin->m;
1164   info->columns_local     = (double)matin->N;
1165 
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatSetOption_MPIAIJ"
1171 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1172 {
1173   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1174   int        ierr;
1175 
1176   PetscFunctionBegin;
1177   switch (op) {
1178   case MAT_NO_NEW_NONZERO_LOCATIONS:
1179   case MAT_YES_NEW_NONZERO_LOCATIONS:
1180   case MAT_COLUMNS_UNSORTED:
1181   case MAT_COLUMNS_SORTED:
1182   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1183   case MAT_KEEP_ZEROED_ROWS:
1184   case MAT_NEW_NONZERO_LOCATION_ERR:
1185   case MAT_USE_INODES:
1186   case MAT_DO_NOT_USE_INODES:
1187   case MAT_IGNORE_ZERO_ENTRIES:
1188     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1189     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1190     break;
1191   case MAT_ROW_ORIENTED:
1192     a->roworiented = PETSC_TRUE;
1193     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1194     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1195     break;
1196   case MAT_ROWS_SORTED:
1197   case MAT_ROWS_UNSORTED:
1198   case MAT_YES_NEW_DIAGONALS:
1199     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1200     break;
1201   case MAT_COLUMN_ORIENTED:
1202     a->roworiented = PETSC_FALSE;
1203     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1205     break;
1206   case MAT_IGNORE_OFF_PROC_ENTRIES:
1207     a->donotstash = PETSC_TRUE;
1208     break;
1209   case MAT_NO_NEW_DIAGONALS:
1210     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1211   default:
1212     SETERRQ(PETSC_ERR_SUP,"unknown option");
1213   }
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetRow_MPIAIJ"
1219 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1220 {
1221   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1222   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1223   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1224   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1225   int          *cmap,*idx_p;
1226 
1227   PetscFunctionBegin;
1228   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1229   mat->getrowactive = PETSC_TRUE;
1230 
1231   if (!mat->rowvalues && (idx || v)) {
1232     /*
1233         allocate enough space to hold information from the longest row.
1234     */
1235     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1236     int     max = 1,tmp;
1237     for (i=0; i<matin->m; i++) {
1238       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1239       if (max < tmp) { max = tmp; }
1240     }
1241     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1242     mat->rowindices = (int*)(mat->rowvalues + max);
1243   }
1244 
1245   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1246   lrow = row - rstart;
1247 
1248   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1249   if (!v)   {pvA = 0; pvB = 0;}
1250   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1251   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1252   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1253   nztot = nzA + nzB;
1254 
1255   cmap  = mat->garray;
1256   if (v  || idx) {
1257     if (nztot) {
1258       /* Sort by increasing column numbers, assuming A and B already sorted */
1259       int imark = -1;
1260       if (v) {
1261         *v = v_p = mat->rowvalues;
1262         for (i=0; i<nzB; i++) {
1263           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1264           else break;
1265         }
1266         imark = i;
1267         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1268         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1269       }
1270       if (idx) {
1271         *idx = idx_p = mat->rowindices;
1272         if (imark > -1) {
1273           for (i=0; i<imark; i++) {
1274             idx_p[i] = cmap[cworkB[i]];
1275           }
1276         } else {
1277           for (i=0; i<nzB; i++) {
1278             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1279             else break;
1280           }
1281           imark = i;
1282         }
1283         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1284         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1285       }
1286     } else {
1287       if (idx) *idx = 0;
1288       if (v)   *v   = 0;
1289     }
1290   }
1291   *nz = nztot;
1292   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1299 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1300 {
1301   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1302 
1303   PetscFunctionBegin;
1304   if (aij->getrowactive == PETSC_FALSE) {
1305     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1306   }
1307   aij->getrowactive = PETSC_FALSE;
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatNorm_MPIAIJ"
1313 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1314 {
1315   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1316   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1317   int          ierr,i,j,cstart = aij->cstart;
1318   PetscReal    sum = 0.0;
1319   PetscScalar  *v;
1320 
1321   PetscFunctionBegin;
1322   if (aij->size == 1) {
1323     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1324   } else {
1325     if (type == NORM_FROBENIUS) {
1326       v = amat->a;
1327       for (i=0; i<amat->nz; i++) {
1328 #if defined(PETSC_USE_COMPLEX)
1329         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1330 #else
1331         sum += (*v)*(*v); v++;
1332 #endif
1333       }
1334       v = bmat->a;
1335       for (i=0; i<bmat->nz; i++) {
1336 #if defined(PETSC_USE_COMPLEX)
1337         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1338 #else
1339         sum += (*v)*(*v); v++;
1340 #endif
1341       }
1342       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1343       *norm = sqrt(*norm);
1344     } else if (type == NORM_1) { /* max column norm */
1345       PetscReal *tmp,*tmp2;
1346       int    *jj,*garray = aij->garray;
1347       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1348       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1349       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1350       *norm = 0.0;
1351       v = amat->a; jj = amat->j;
1352       for (j=0; j<amat->nz; j++) {
1353         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1354       }
1355       v = bmat->a; jj = bmat->j;
1356       for (j=0; j<bmat->nz; j++) {
1357         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1358       }
1359       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1360       for (j=0; j<mat->N; j++) {
1361         if (tmp2[j] > *norm) *norm = tmp2[j];
1362       }
1363       ierr = PetscFree(tmp);CHKERRQ(ierr);
1364       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1365     } else if (type == NORM_INFINITY) { /* max row norm */
1366       PetscReal ntemp = 0.0;
1367       for (j=0; j<aij->A->m; j++) {
1368         v = amat->a + amat->i[j];
1369         sum = 0.0;
1370         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1371           sum += PetscAbsScalar(*v); v++;
1372         }
1373         v = bmat->a + bmat->i[j];
1374         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1375           sum += PetscAbsScalar(*v); v++;
1376         }
1377         if (sum > ntemp) ntemp = sum;
1378       }
1379       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1380     } else {
1381       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1382     }
1383   }
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatTranspose_MPIAIJ"
1389 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1390 {
1391   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1392   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1393   int          ierr;
1394   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1395   Mat          B;
1396   PetscScalar  *array;
1397 
1398   PetscFunctionBegin;
1399   if (!matout && M != N) {
1400     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1401   }
1402 
1403   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1404 
1405   /* copy over the A part */
1406   Aloc = (Mat_SeqAIJ*)a->A->data;
1407   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1408   row = a->rstart;
1409   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1410   for (i=0; i<m; i++) {
1411     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1412     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1413   }
1414   aj = Aloc->j;
1415   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1416 
1417   /* copy over the B part */
1418   Aloc = (Mat_SeqAIJ*)a->B->data;
1419   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1420   row  = a->rstart;
1421   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1422   ct   = cols;
1423   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1424   for (i=0; i<m; i++) {
1425     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1426     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1427   }
1428   ierr = PetscFree(ct);CHKERRQ(ierr);
1429   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1430   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1431   if (matout) {
1432     *matout = B;
1433   } else {
1434     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1441 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1442 {
1443   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1444   Mat        a = aij->A,b = aij->B;
1445   int        ierr,s1,s2,s3;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1449   if (rr) {
1450     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1451     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1452     /* Overlap communication with computation. */
1453     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1454   }
1455   if (ll) {
1456     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1457     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1458     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1459   }
1460   /* scale  the diagonal block */
1461   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1462 
1463   if (rr) {
1464     /* Do a scatter end and then right scale the off-diagonal block */
1465     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1466     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1467   }
1468 
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1475 int MatPrintHelp_MPIAIJ(Mat A)
1476 {
1477   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1478   int        ierr;
1479 
1480   PetscFunctionBegin;
1481   if (!a->rank) {
1482     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1483   }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1489 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1490 {
1491   PetscFunctionBegin;
1492   *bs = 1;
1493   PetscFunctionReturn(0);
1494 }
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1497 int MatSetUnfactored_MPIAIJ(Mat A)
1498 {
1499   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1500   int        ierr;
1501 
1502   PetscFunctionBegin;
1503   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatEqual_MPIAIJ"
1509 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1510 {
1511   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1512   Mat        a,b,c,d;
1513   PetscTruth flg;
1514   int        ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1518   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1519   a = matA->A; b = matA->B;
1520   c = matB->A; d = matB->B;
1521 
1522   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1523   if (flg == PETSC_TRUE) {
1524     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1525   }
1526   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatCopy_MPIAIJ"
1532 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1533 {
1534   int        ierr;
1535   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1536   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1537   PetscTruth flg;
1538 
1539   PetscFunctionBegin;
1540   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1541   if (str != SAME_NONZERO_PATTERN || !flg) {
1542     /* because of the column compression in the off-processor part of the matrix a->B,
1543        the number of columns in a->B and b->B may be different, hence we cannot call
1544        the MatCopy() directly on the two parts. If need be, we can provide a more
1545        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1546        then copying the submatrices */
1547     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1548   } else {
1549     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1550     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1557 int MatSetUpPreallocation_MPIAIJ(Mat A)
1558 {
1559   int        ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1567 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1568 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1569 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1570 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1571 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1572 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1573 #endif
1574 
1575 #include "petscblaslapack.h"
1576 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatAXPY_MPIAIJ"
1579 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1580 {
1581   int        ierr,one=1,i;
1582   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1583   Mat_SeqAIJ *x,*y;
1584 
1585   PetscFunctionBegin;
1586   if (str == SAME_NONZERO_PATTERN) {
1587     x = (Mat_SeqAIJ *)xx->A->data;
1588     y = (Mat_SeqAIJ *)yy->A->data;
1589     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1590     x = (Mat_SeqAIJ *)xx->B->data;
1591     y = (Mat_SeqAIJ *)yy->B->data;
1592     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1593   } else if (str == SUBSET_NONZERO_PATTERN) {
1594     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1595 
1596     x = (Mat_SeqAIJ *)xx->B->data;
1597     y = (Mat_SeqAIJ *)yy->B->data;
1598     if (y->xtoy && y->XtoY != xx->B) {
1599       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1600       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1601     }
1602     if (!y->xtoy) { /* get xtoy */
1603       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1604       y->XtoY = xx->B;
1605     }
1606     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1607   } else {
1608     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 /* -------------------------------------------------------------------*/
1614 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1615        MatGetRow_MPIAIJ,
1616        MatRestoreRow_MPIAIJ,
1617        MatMult_MPIAIJ,
1618        MatMultAdd_MPIAIJ,
1619        MatMultTranspose_MPIAIJ,
1620        MatMultTransposeAdd_MPIAIJ,
1621        0,
1622        0,
1623        0,
1624        0,
1625        0,
1626        0,
1627        MatRelax_MPIAIJ,
1628        MatTranspose_MPIAIJ,
1629        MatGetInfo_MPIAIJ,
1630        MatEqual_MPIAIJ,
1631        MatGetDiagonal_MPIAIJ,
1632        MatDiagonalScale_MPIAIJ,
1633        MatNorm_MPIAIJ,
1634        MatAssemblyBegin_MPIAIJ,
1635        MatAssemblyEnd_MPIAIJ,
1636        0,
1637        MatSetOption_MPIAIJ,
1638        MatZeroEntries_MPIAIJ,
1639        MatZeroRows_MPIAIJ,
1640 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1641        MatLUFactorSymbolic_MPIAIJ_TFS,
1642 #else
1643        0,
1644 #endif
1645        0,
1646        0,
1647        0,
1648        MatSetUpPreallocation_MPIAIJ,
1649        0,
1650        0,
1651        0,
1652        0,
1653        MatDuplicate_MPIAIJ,
1654        0,
1655        0,
1656        0,
1657        0,
1658        MatAXPY_MPIAIJ,
1659        MatGetSubMatrices_MPIAIJ,
1660        MatIncreaseOverlap_MPIAIJ,
1661        MatGetValues_MPIAIJ,
1662        MatCopy_MPIAIJ,
1663        MatPrintHelp_MPIAIJ,
1664        MatScale_MPIAIJ,
1665        0,
1666        0,
1667        0,
1668        MatGetBlockSize_MPIAIJ,
1669        0,
1670        0,
1671        0,
1672        0,
1673        MatFDColoringCreate_MPIAIJ,
1674        0,
1675        MatSetUnfactored_MPIAIJ,
1676        0,
1677        0,
1678        MatGetSubMatrix_MPIAIJ,
1679        MatDestroy_MPIAIJ,
1680        MatView_MPIAIJ,
1681        MatGetPetscMaps_Petsc,
1682        0,
1683        0,
1684        0,
1685        0,
1686        0,
1687        0,
1688        0,
1689        0,
1690        MatSetColoring_MPIAIJ,
1691        MatSetValuesAdic_MPIAIJ,
1692        MatSetValuesAdifor_MPIAIJ
1693 };
1694 
1695 /* ----------------------------------------------------------------------------------------*/
1696 
1697 EXTERN_C_BEGIN
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1700 int MatStoreValues_MPIAIJ(Mat mat)
1701 {
1702   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1703   int        ierr;
1704 
1705   PetscFunctionBegin;
1706   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1707   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 EXTERN_C_END
1711 
1712 EXTERN_C_BEGIN
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1715 int MatRetrieveValues_MPIAIJ(Mat mat)
1716 {
1717   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1718   int        ierr;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1722   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 EXTERN_C_END
1726 
1727 #include "petscpc.h"
1728 EXTERN_C_BEGIN
1729 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1730 EXTERN_C_END
1731 
1732 EXTERN_C_BEGIN
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1735 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1736 {
1737   Mat_MPIAIJ   *b;
1738   int          ierr,i;
1739   PetscTruth   flg2;
1740 
1741   PetscFunctionBegin;
1742   B->preallocated = PETSC_TRUE;
1743   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1744   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1745   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1746   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1747   if (d_nnz) {
1748     for (i=0; i<B->m; i++) {
1749       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1750     }
1751   }
1752   if (o_nnz) {
1753     for (i=0; i<B->m; i++) {
1754       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1755     }
1756   }
1757   b = (Mat_MPIAIJ*)B->data;
1758 
1759   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1760   PetscLogObjectParent(B,b->A);
1761   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1762   PetscLogObjectParent(B,b->B);
1763 
1764   PetscFunctionReturn(0);
1765 }
1766 EXTERN_C_END
1767 
1768 EXTERN_C_BEGIN
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatCreate_MPIAIJ"
1771 int MatCreate_MPIAIJ(Mat B)
1772 {
1773   Mat_MPIAIJ *b;
1774   int        ierr,i,size;
1775 
1776   PetscFunctionBegin;
1777   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1778 
1779   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1780   B->data         = (void*)b;
1781   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1782   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1783   B->factor       = 0;
1784   B->assembled    = PETSC_FALSE;
1785   B->mapping      = 0;
1786 
1787   B->insertmode      = NOT_SET_VALUES;
1788   b->size            = size;
1789   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1790 
1791   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1792   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1793 
1794   /* the information in the maps duplicates the information computed below, eventually
1795      we should remove the duplicate information that is not contained in the maps */
1796   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1797   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1798 
1799   /* build local table of row and column ownerships */
1800   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1801   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1802   b->cowners = b->rowners + b->size + 2;
1803   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1804   b->rowners[0] = 0;
1805   for (i=2; i<=b->size; i++) {
1806     b->rowners[i] += b->rowners[i-1];
1807   }
1808   b->rstart = b->rowners[b->rank];
1809   b->rend   = b->rowners[b->rank+1];
1810   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1811   b->cowners[0] = 0;
1812   for (i=2; i<=b->size; i++) {
1813     b->cowners[i] += b->cowners[i-1];
1814   }
1815   b->cstart = b->cowners[b->rank];
1816   b->cend   = b->cowners[b->rank+1];
1817 
1818   /* build cache for off array entries formed */
1819   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1820   b->donotstash  = PETSC_FALSE;
1821   b->colmap      = 0;
1822   b->garray      = 0;
1823   b->roworiented = PETSC_TRUE;
1824 
1825   /* stuff used for matrix vector multiply */
1826   b->lvec      = PETSC_NULL;
1827   b->Mvctx     = PETSC_NULL;
1828 
1829   /* stuff for MatGetRow() */
1830   b->rowindices   = 0;
1831   b->rowvalues    = 0;
1832   b->getrowactive = PETSC_FALSE;
1833 
1834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1835                                      "MatStoreValues_MPIAIJ",
1836                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1838                                      "MatRetrieveValues_MPIAIJ",
1839                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1841 				     "MatGetDiagonalBlock_MPIAIJ",
1842                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1844 				     "MatIsSymmetric_MPIAIJ",
1845 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1847 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1848 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 EXTERN_C_END
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1855 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1856 {
1857   Mat        mat;
1858   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1859   int        ierr;
1860 
1861   PetscFunctionBegin;
1862   *newmat       = 0;
1863   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1864   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1865   a    = (Mat_MPIAIJ*)mat->data;
1866   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1867   mat->factor       = matin->factor;
1868   mat->assembled    = PETSC_TRUE;
1869   mat->insertmode   = NOT_SET_VALUES;
1870   mat->preallocated = PETSC_TRUE;
1871 
1872   a->rstart       = oldmat->rstart;
1873   a->rend         = oldmat->rend;
1874   a->cstart       = oldmat->cstart;
1875   a->cend         = oldmat->cend;
1876   a->size         = oldmat->size;
1877   a->rank         = oldmat->rank;
1878   a->donotstash   = oldmat->donotstash;
1879   a->roworiented  = oldmat->roworiented;
1880   a->rowindices   = 0;
1881   a->rowvalues    = 0;
1882   a->getrowactive = PETSC_FALSE;
1883 
1884   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1885   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1886   if (oldmat->colmap) {
1887 #if defined (PETSC_USE_CTABLE)
1888     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1889 #else
1890     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1891     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1892     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1893 #endif
1894   } else a->colmap = 0;
1895   if (oldmat->garray) {
1896     int len;
1897     len  = oldmat->B->n;
1898     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1899     PetscLogObjectMemory(mat,len*sizeof(int));
1900     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1901   } else a->garray = 0;
1902 
1903   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1904   PetscLogObjectParent(mat,a->lvec);
1905   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1906   PetscLogObjectParent(mat,a->Mvctx);
1907   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1908   PetscLogObjectParent(mat,a->A);
1909   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1910   PetscLogObjectParent(mat,a->B);
1911   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1912   *newmat = mat;
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #include "petscsys.h"
1917 
1918 EXTERN_C_BEGIN
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatLoad_MPIAIJ"
1921 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1922 {
1923   Mat          A;
1924   PetscScalar  *vals,*svals;
1925   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1926   MPI_Status   status;
1927   int          i,nz,ierr,j,rstart,rend,fd;
1928   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1929   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1930   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1931 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
1932   PetscTruth   flag;
1933 #endif
1934 
1935   PetscFunctionBegin;
1936   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1937   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1938   if (!rank) {
1939     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1940     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1941     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1942     if (header[3] < 0) {
1943       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1944     }
1945   }
1946 
1947   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1948   M = header[1]; N = header[2];
1949   /* determine ownership of all rows */
1950   m = M/size + ((M % size) > rank);
1951   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1952   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1953   rowners[0] = 0;
1954   for (i=2; i<=size; i++) {
1955     rowners[i] += rowners[i-1];
1956   }
1957   rstart = rowners[rank];
1958   rend   = rowners[rank+1];
1959 
1960   /* distribute row lengths to all processors */
1961   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1962   offlens = ourlens + (rend-rstart);
1963   if (!rank) {
1964     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1965     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1966     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1967     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1968     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1969     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1970   } else {
1971     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1972   }
1973 
1974   if (!rank) {
1975     /* calculate the number of nonzeros on each processor */
1976     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1977     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1978     for (i=0; i<size; i++) {
1979       for (j=rowners[i]; j< rowners[i+1]; j++) {
1980         procsnz[i] += rowlengths[j];
1981       }
1982     }
1983     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1984 
1985     /* determine max buffer needed and allocate it */
1986     maxnz = 0;
1987     for (i=0; i<size; i++) {
1988       maxnz = PetscMax(maxnz,procsnz[i]);
1989     }
1990     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1991 
1992     /* read in my part of the matrix column indices  */
1993     nz   = procsnz[0];
1994     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1995     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1996 
1997     /* read in every one elses and ship off */
1998     for (i=1; i<size; i++) {
1999       nz   = procsnz[i];
2000       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2001       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2002     }
2003     ierr = PetscFree(cols);CHKERRQ(ierr);
2004   } else {
2005     /* determine buffer space needed for message */
2006     nz = 0;
2007     for (i=0; i<m; i++) {
2008       nz += ourlens[i];
2009     }
2010     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2011 
2012     /* receive message of column indices*/
2013     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2014     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2015     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2016   }
2017 
2018   /* determine column ownership if matrix is not square */
2019   if (N != M) {
2020     n      = N/size + ((N % size) > rank);
2021     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2022     cstart = cend - n;
2023   } else {
2024     cstart = rstart;
2025     cend   = rend;
2026     n      = cend - cstart;
2027   }
2028 
2029   /* loop over local rows, determining number of off diagonal entries */
2030   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2031   jj = 0;
2032   for (i=0; i<m; i++) {
2033     for (j=0; j<ourlens[i]; j++) {
2034       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2035       jj++;
2036     }
2037   }
2038 
2039   /* create our matrix */
2040   for (i=0; i<m; i++) {
2041     ourlens[i] -= offlens[i];
2042   }
2043   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2044   A = *newmat;
2045   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2046   for (i=0; i<m; i++) {
2047     ourlens[i] += offlens[i];
2048   }
2049 
2050   if (!rank) {
2051     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2052 
2053     /* read in my part of the matrix numerical values  */
2054     nz   = procsnz[0];
2055     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2056 
2057     /* insert into matrix */
2058     jj      = rstart;
2059     smycols = mycols;
2060     svals   = vals;
2061     for (i=0; i<m; i++) {
2062       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2063       smycols += ourlens[i];
2064       svals   += ourlens[i];
2065       jj++;
2066     }
2067 
2068     /* read in other processors and ship out */
2069     for (i=1; i<size; i++) {
2070       nz   = procsnz[i];
2071       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2072       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2073     }
2074     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2075   } else {
2076     /* receive numeric values */
2077     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2078 
2079     /* receive message of values*/
2080     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2081     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2082     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2083 
2084     /* insert into matrix */
2085     jj      = rstart;
2086     smycols = mycols;
2087     svals   = vals;
2088     for (i=0; i<m; i++) {
2089       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2090       smycols += ourlens[i];
2091       svals   += ourlens[i];
2092       jj++;
2093     }
2094   }
2095   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2096   ierr = PetscFree(vals);CHKERRQ(ierr);
2097   ierr = PetscFree(mycols);CHKERRQ(ierr);
2098   ierr = PetscFree(rowners);CHKERRQ(ierr);
2099 
2100   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2101   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2102 #if defined(PETSC_HAVE_SPOOLES)
2103   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2104   if (flag) {
2105     if (size == 1) {
2106       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2107     } else {
2108       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2109     }
2110   }
2111 #endif
2112 #if defined(PETSC_HAVE_SUPERLUDIST)
2113   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2114   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2115 #endif
2116 #if defined(PETSC_HAVE_MUMPS)
2117   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2118   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2119 #endif
2120   PetscFunctionReturn(0);
2121 }
2122 EXTERN_C_END
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2126 /*
2127     Not great since it makes two copies of the submatrix, first an SeqAIJ
2128   in local and then by concatenating the local matrices the end result.
2129   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2130 */
2131 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2132 {
2133   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2134   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2135   Mat          *local,M,Mreuse;
2136   PetscScalar  *vwork,*aa;
2137   MPI_Comm     comm = mat->comm;
2138   Mat_SeqAIJ   *aij;
2139 
2140 
2141   PetscFunctionBegin;
2142   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2143   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2144 
2145   if (call ==  MAT_REUSE_MATRIX) {
2146     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2147     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2148     local = &Mreuse;
2149     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2150   } else {
2151     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2152     Mreuse = *local;
2153     ierr   = PetscFree(local);CHKERRQ(ierr);
2154   }
2155 
2156   /*
2157       m - number of local rows
2158       n - number of columns (same on all processors)
2159       rstart - first row in new global matrix generated
2160   */
2161   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2162   if (call == MAT_INITIAL_MATRIX) {
2163     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2164     ii  = aij->i;
2165     jj  = aij->j;
2166 
2167     /*
2168         Determine the number of non-zeros in the diagonal and off-diagonal
2169         portions of the matrix in order to do correct preallocation
2170     */
2171 
2172     /* first get start and end of "diagonal" columns */
2173     if (csize == PETSC_DECIDE) {
2174       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2175       if (mglobal == n) { /* square matrix */
2176 	nlocal = m;
2177       } else {
2178         nlocal = n/size + ((n % size) > rank);
2179       }
2180     } else {
2181       nlocal = csize;
2182     }
2183     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2184     rstart = rend - nlocal;
2185     if (rank == size - 1 && rend != n) {
2186       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2187     }
2188 
2189     /* next, compute all the lengths */
2190     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2191     olens = dlens + m;
2192     for (i=0; i<m; i++) {
2193       jend = ii[i+1] - ii[i];
2194       olen = 0;
2195       dlen = 0;
2196       for (j=0; j<jend; j++) {
2197         if (*jj < rstart || *jj >= rend) olen++;
2198         else dlen++;
2199         jj++;
2200       }
2201       olens[i] = olen;
2202       dlens[i] = dlen;
2203     }
2204     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2205     ierr = PetscFree(dlens);CHKERRQ(ierr);
2206   } else {
2207     int ml,nl;
2208 
2209     M = *newmat;
2210     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2211     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2212     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2213     /*
2214          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2215        rather than the slower MatSetValues().
2216     */
2217     M->was_assembled = PETSC_TRUE;
2218     M->assembled     = PETSC_FALSE;
2219   }
2220   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2221   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2222   ii  = aij->i;
2223   jj  = aij->j;
2224   aa  = aij->a;
2225   for (i=0; i<m; i++) {
2226     row   = rstart + i;
2227     nz    = ii[i+1] - ii[i];
2228     cwork = jj;     jj += nz;
2229     vwork = aa;     aa += nz;
2230     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2231   }
2232 
2233   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2234   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2235   *newmat = M;
2236 
2237   /* save submatrix used in processor for next request */
2238   if (call ==  MAT_INITIAL_MATRIX) {
2239     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2240     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2241   }
2242 
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2248 /*@C
2249    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2250    (the default parallel PETSc format).  For good matrix assembly performance
2251    the user should preallocate the matrix storage by setting the parameters
2252    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2253    performance can be increased by more than a factor of 50.
2254 
2255    Collective on MPI_Comm
2256 
2257    Input Parameters:
2258 +  A - the matrix
2259 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2260            (same value is used for all local rows)
2261 .  d_nnz - array containing the number of nonzeros in the various rows of the
2262            DIAGONAL portion of the local submatrix (possibly different for each row)
2263            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2264            The size of this array is equal to the number of local rows, i.e 'm'.
2265            You must leave room for the diagonal entry even if it is zero.
2266 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2267            submatrix (same value is used for all local rows).
2268 -  o_nnz - array containing the number of nonzeros in the various rows of the
2269            OFF-DIAGONAL portion of the local submatrix (possibly different for
2270            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2271            structure. The size of this array is equal to the number
2272            of local rows, i.e 'm'.
2273 
2274    The AIJ format (also called the Yale sparse matrix format or
2275    compressed row storage), is fully compatible with standard Fortran 77
2276    storage.  That is, the stored row and column indices can begin at
2277    either one (as in Fortran) or zero.  See the users manual for details.
2278 
2279    The user MUST specify either the local or global matrix dimensions
2280    (possibly both).
2281 
2282    The parallel matrix is partitioned such that the first m0 rows belong to
2283    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2284    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2285 
2286    The DIAGONAL portion of the local submatrix of a processor can be defined
2287    as the submatrix which is obtained by extraction the part corresponding
2288    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2289    first row that belongs to the processor, and r2 is the last row belonging
2290    to the this processor. This is a square mxm matrix. The remaining portion
2291    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2292 
2293    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2294 
2295    By default, this format uses inodes (identical nodes) when possible.
2296    We search for consecutive rows with the same nonzero structure, thereby
2297    reusing matrix information to achieve increased efficiency.
2298 
2299    Options Database Keys:
2300 +  -mat_aij_no_inode  - Do not use inodes
2301 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2302 -  -mat_aij_oneindex - Internally use indexing starting at 1
2303         rather than 0.  Note that when calling MatSetValues(),
2304         the user still MUST index entries starting at 0!
2305 
2306    Example usage:
2307 
2308    Consider the following 8x8 matrix with 34 non-zero values, that is
2309    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2310    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2311    as follows:
2312 
2313 .vb
2314             1  2  0  |  0  3  0  |  0  4
2315     Proc0   0  5  6  |  7  0  0  |  8  0
2316             9  0 10  | 11  0  0  | 12  0
2317     -------------------------------------
2318            13  0 14  | 15 16 17  |  0  0
2319     Proc1   0 18  0  | 19 20 21  |  0  0
2320             0  0  0  | 22 23  0  | 24  0
2321     -------------------------------------
2322     Proc2  25 26 27  |  0  0 28  | 29  0
2323            30  0  0  | 31 32 33  |  0 34
2324 .ve
2325 
2326    This can be represented as a collection of submatrices as:
2327 
2328 .vb
2329       A B C
2330       D E F
2331       G H I
2332 .ve
2333 
2334    Where the submatrices A,B,C are owned by proc0, D,E,F are
2335    owned by proc1, G,H,I are owned by proc2.
2336 
2337    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2338    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2339    The 'M','N' parameters are 8,8, and have the same values on all procs.
2340 
2341    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2342    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2343    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2344    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2345    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2346    matrix, ans [DF] as another SeqAIJ matrix.
2347 
2348    When d_nz, o_nz parameters are specified, d_nz storage elements are
2349    allocated for every row of the local diagonal submatrix, and o_nz
2350    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2351    One way to choose d_nz and o_nz is to use the max nonzerors per local
2352    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2353    In this case, the values of d_nz,o_nz are:
2354 .vb
2355      proc0 : dnz = 2, o_nz = 2
2356      proc1 : dnz = 3, o_nz = 2
2357      proc2 : dnz = 1, o_nz = 4
2358 .ve
2359    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2360    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2361    for proc3. i.e we are using 12+15+10=37 storage locations to store
2362    34 values.
2363 
2364    When d_nnz, o_nnz parameters are specified, the storage is specified
2365    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2366    In the above case the values for d_nnz,o_nnz are:
2367 .vb
2368      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2369      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2370      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2371 .ve
2372    Here the space allocated is sum of all the above values i.e 34, and
2373    hence pre-allocation is perfect.
2374 
2375    Level: intermediate
2376 
2377 .keywords: matrix, aij, compressed row, sparse, parallel
2378 
2379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2380 @*/
2381 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2382 {
2383   int ierr,(*f)(Mat,int,int*,int,int*);
2384 
2385   PetscFunctionBegin;
2386   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2387   if (f) {
2388     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatCreateMPIAIJ"
2395 /*@C
2396    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2397    (the default parallel PETSc format).  For good matrix assembly performance
2398    the user should preallocate the matrix storage by setting the parameters
2399    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2400    performance can be increased by more than a factor of 50.
2401 
2402    Collective on MPI_Comm
2403 
2404    Input Parameters:
2405 +  comm - MPI communicator
2406 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2407            This value should be the same as the local size used in creating the
2408            y vector for the matrix-vector product y = Ax.
2409 .  n - This value should be the same as the local size used in creating the
2410        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2411        calculated if N is given) For square matrices n is almost always m.
2412 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2413 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2414 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2415            (same value is used for all local rows)
2416 .  d_nnz - array containing the number of nonzeros in the various rows of the
2417            DIAGONAL portion of the local submatrix (possibly different for each row)
2418            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2419            The size of this array is equal to the number of local rows, i.e 'm'.
2420            You must leave room for the diagonal entry even if it is zero.
2421 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2422            submatrix (same value is used for all local rows).
2423 -  o_nnz - array containing the number of nonzeros in the various rows of the
2424            OFF-DIAGONAL portion of the local submatrix (possibly different for
2425            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2426            structure. The size of this array is equal to the number
2427            of local rows, i.e 'm'.
2428 
2429    Output Parameter:
2430 .  A - the matrix
2431 
2432    Notes:
2433    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2434    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2435    storage requirements for this matrix.
2436 
2437    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2438    processor than it must be used on all processors that share the object for
2439    that argument.
2440 
2441    The AIJ format (also called the Yale sparse matrix format or
2442    compressed row storage), is fully compatible with standard Fortran 77
2443    storage.  That is, the stored row and column indices can begin at
2444    either one (as in Fortran) or zero.  See the users manual for details.
2445 
2446    The user MUST specify either the local or global matrix dimensions
2447    (possibly both).
2448 
2449    The parallel matrix is partitioned such that the first m0 rows belong to
2450    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2451    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2452 
2453    The DIAGONAL portion of the local submatrix of a processor can be defined
2454    as the submatrix which is obtained by extraction the part corresponding
2455    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2456    first row that belongs to the processor, and r2 is the last row belonging
2457    to the this processor. This is a square mxm matrix. The remaining portion
2458    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2459 
2460    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2461 
2462    By default, this format uses inodes (identical nodes) when possible.
2463    We search for consecutive rows with the same nonzero structure, thereby
2464    reusing matrix information to achieve increased efficiency.
2465 
2466    Options Database Keys:
2467 +  -mat_aij_no_inode  - Do not use inodes
2468 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2469 -  -mat_aij_oneindex - Internally use indexing starting at 1
2470         rather than 0.  Note that when calling MatSetValues(),
2471         the user still MUST index entries starting at 0!
2472 
2473 
2474    Example usage:
2475 
2476    Consider the following 8x8 matrix with 34 non-zero values, that is
2477    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2478    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2479    as follows:
2480 
2481 .vb
2482             1  2  0  |  0  3  0  |  0  4
2483     Proc0   0  5  6  |  7  0  0  |  8  0
2484             9  0 10  | 11  0  0  | 12  0
2485     -------------------------------------
2486            13  0 14  | 15 16 17  |  0  0
2487     Proc1   0 18  0  | 19 20 21  |  0  0
2488             0  0  0  | 22 23  0  | 24  0
2489     -------------------------------------
2490     Proc2  25 26 27  |  0  0 28  | 29  0
2491            30  0  0  | 31 32 33  |  0 34
2492 .ve
2493 
2494    This can be represented as a collection of submatrices as:
2495 
2496 .vb
2497       A B C
2498       D E F
2499       G H I
2500 .ve
2501 
2502    Where the submatrices A,B,C are owned by proc0, D,E,F are
2503    owned by proc1, G,H,I are owned by proc2.
2504 
2505    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2506    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2507    The 'M','N' parameters are 8,8, and have the same values on all procs.
2508 
2509    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2510    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2511    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2512    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2513    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2514    matrix, ans [DF] as another SeqAIJ matrix.
2515 
2516    When d_nz, o_nz parameters are specified, d_nz storage elements are
2517    allocated for every row of the local diagonal submatrix, and o_nz
2518    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2519    One way to choose d_nz and o_nz is to use the max nonzerors per local
2520    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2521    In this case, the values of d_nz,o_nz are:
2522 .vb
2523      proc0 : dnz = 2, o_nz = 2
2524      proc1 : dnz = 3, o_nz = 2
2525      proc2 : dnz = 1, o_nz = 4
2526 .ve
2527    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2528    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2529    for proc3. i.e we are using 12+15+10=37 storage locations to store
2530    34 values.
2531 
2532    When d_nnz, o_nnz parameters are specified, the storage is specified
2533    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2534    In the above case the values for d_nnz,o_nnz are:
2535 .vb
2536      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2537      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2538      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2539 .ve
2540    Here the space allocated is sum of all the above values i.e 34, and
2541    hence pre-allocation is perfect.
2542 
2543    Level: intermediate
2544 
2545 .keywords: matrix, aij, compressed row, sparse, parallel
2546 
2547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2548 @*/
2549 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2550 {
2551   int ierr,size;
2552 
2553   PetscFunctionBegin;
2554   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2555   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2556   if (size > 1) {
2557     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2558     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2559   } else {
2560     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2561     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2562   }
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2568 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2569 {
2570   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2571   PetscFunctionBegin;
2572   *Ad     = a->A;
2573   *Ao     = a->B;
2574   *colmap = a->garray;
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 #undef __FUNCT__
2579 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2580 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2581 {
2582   int        ierr,i;
2583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2584 
2585   PetscFunctionBegin;
2586   if (coloring->ctype == IS_COLORING_LOCAL) {
2587     ISColoringValue *allcolors,*colors;
2588     ISColoring      ocoloring;
2589 
2590     /* set coloring for diagonal portion */
2591     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2592 
2593     /* set coloring for off-diagonal portion */
2594     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2595     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2596     for (i=0; i<a->B->n; i++) {
2597       colors[i] = allcolors[a->garray[i]];
2598     }
2599     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2600     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2601     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2602     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2603   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2604     ISColoringValue *colors;
2605     int             *larray;
2606     ISColoring      ocoloring;
2607 
2608     /* set coloring for diagonal portion */
2609     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2610     for (i=0; i<a->A->n; i++) {
2611       larray[i] = i + a->cstart;
2612     }
2613     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2614     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2615     for (i=0; i<a->A->n; i++) {
2616       colors[i] = coloring->colors[larray[i]];
2617     }
2618     ierr = PetscFree(larray);CHKERRQ(ierr);
2619     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2620     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2621     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2622 
2623     /* set coloring for off-diagonal portion */
2624     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2625     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2626     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2627     for (i=0; i<a->B->n; i++) {
2628       colors[i] = coloring->colors[larray[i]];
2629     }
2630     ierr = PetscFree(larray);CHKERRQ(ierr);
2631     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2632     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2633     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2634   } else {
2635     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2636   }
2637 
2638   PetscFunctionReturn(0);
2639 }
2640 
2641 #undef __FUNCT__
2642 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2643 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2644 {
2645   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2646   int        ierr;
2647 
2648   PetscFunctionBegin;
2649   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2650   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2656 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2657 {
2658   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2659   int        ierr;
2660 
2661   PetscFunctionBegin;
2662   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2663   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatMerge"
2669 /*@C
2670       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2671                  matrices from each processor
2672 
2673     Collective on MPI_Comm
2674 
2675    Input Parameters:
2676 +    comm - the communicators the parallel matrix will live on
2677 -    inmat - the input sequential matrices
2678 
2679    Output Parameter:
2680 .    outmat - the parallel matrix generated
2681 
2682     Level: advanced
2683 
2684    Notes: The number of columns of the matrix in EACH of the seperate files
2685       MUST be the same.
2686 
2687 @*/
2688 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2689 {
2690   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2691   PetscScalar *values;
2692   PetscMap    columnmap,rowmap;
2693 
2694   PetscFunctionBegin;
2695 
2696   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2697 
2698   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2699   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2700   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2701   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2702   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2703   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2704 
2705   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2706   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2707   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2708   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2709   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2710 
2711   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2712   for (i=0;i<m;i++) {
2713     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2714     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2715     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2716   }
2717   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2718   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2719 
2720   for (i=0;i<m;i++) {
2721     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2722     I    = i + rstart;
2723     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2724     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2725   }
2726   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2727   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2728   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2729 
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatFileSplit"
2735 int MatFileSplit(Mat A,char *outfile)
2736 {
2737   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2738   PetscViewer out;
2739   char        *name;
2740   Mat         B;
2741   PetscScalar *values;
2742 
2743   PetscFunctionBegin;
2744 
2745   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2746   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2747   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2748   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2749   for (i=0;i<m;i++) {
2750     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2751     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2752     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2753   }
2754   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756 
2757   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2758   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2759   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2760   sprintf(name,"%s.%d",outfile,rank);
2761   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2762   ierr = PetscFree(name);
2763   ierr = MatView(B,out);CHKERRQ(ierr);
2764   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2765   ierr = MatDestroy(B);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768