xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 448de63ea3ecde4d26bc0435d1cb2a01d7671c6c)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
289     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
294         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_MUMPS)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
421 int MatZeroEntries_MPIAIJ(Mat A)
422 {
423   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
424   int        ierr;
425 
426   PetscFunctionBegin;
427   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
428   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatZeroRows_MPIAIJ"
434 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
435 {
436   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
437   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
438   int            *nprocs,j,idx,nsends,row;
439   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
440   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
441   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
442   MPI_Comm       comm = A->comm;
443   MPI_Request    *send_waits,*recv_waits;
444   MPI_Status     recv_status,*send_status;
445   IS             istmp;
446   PetscTruth     found;
447 
448   PetscFunctionBegin;
449   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
450   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
451 
452   /*  first count number of contributors to each processor */
453   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
454   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
455   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
456   for (i=0; i<N; i++) {
457     idx = rows[i];
458     found = PETSC_FALSE;
459     for (j=0; j<size; j++) {
460       if (idx >= owners[j] && idx < owners[j+1]) {
461         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
462       }
463     }
464     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
465   }
466   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
467 
468   /* inform other processors of number of messages and max length*/
469   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
470 
471   /* post receives:   */
472   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
474   for (i=0; i<nrecvs; i++) {
475     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
476   }
477 
478   /* do sends:
479       1) starts[i] gives the starting index in svalues for stuff going to
480          the ith processor
481   */
482   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
484   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
485   starts[0] = 0;
486   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
487   for (i=0; i<N; i++) {
488     svalues[starts[owner[i]]++] = rows[i];
489   }
490   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
491 
492   starts[0] = 0;
493   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
494   count = 0;
495   for (i=0; i<size; i++) {
496     if (nprocs[2*i+1]) {
497       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
498     }
499   }
500   ierr = PetscFree(starts);CHKERRQ(ierr);
501 
502   base = owners[rank];
503 
504   /*  wait on receives */
505   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
506   source = lens + nrecvs;
507   count  = nrecvs; slen = 0;
508   while (count) {
509     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
510     /* unpack receives into our local space */
511     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
512     source[imdex]  = recv_status.MPI_SOURCE;
513     lens[imdex]    = n;
514     slen          += n;
515     count--;
516   }
517   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
518 
519   /* move the data into the send scatter */
520   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
521   count = 0;
522   for (i=0; i<nrecvs; i++) {
523     values = rvalues + i*nmax;
524     for (j=0; j<lens[i]; j++) {
525       lrows[count++] = values[j] - base;
526     }
527   }
528   ierr = PetscFree(rvalues);CHKERRQ(ierr);
529   ierr = PetscFree(lens);CHKERRQ(ierr);
530   ierr = PetscFree(owner);CHKERRQ(ierr);
531   ierr = PetscFree(nprocs);CHKERRQ(ierr);
532 
533   /* actually zap the local rows */
534   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
535   PetscLogObjectParent(A,istmp);
536 
537   /*
538         Zero the required rows. If the "diagonal block" of the matrix
539      is square and the user wishes to set the diagonal we use seperate
540      code so that MatSetValues() is not called for each diagonal allocating
541      new memory, thus calling lots of mallocs and slowing things down.
542 
543        Contributed by: Mathew Knepley
544   */
545   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
546   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
547   if (diag && (l->A->M == l->A->N)) {
548     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
549   } else if (diag) {
550     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
551     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
552       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
554     }
555     for (i = 0; i < slen; i++) {
556       row  = lrows[i] + rstart;
557       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
558     }
559     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
561   } else {
562     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
563   }
564   ierr = ISDestroy(istmp);CHKERRQ(ierr);
565   ierr = PetscFree(lrows);CHKERRQ(ierr);
566 
567   /* wait on sends */
568   if (nsends) {
569     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
570     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
571     ierr = PetscFree(send_status);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(send_waits);CHKERRQ(ierr);
574   ierr = PetscFree(svalues);CHKERRQ(ierr);
575 
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatMult_MPIAIJ"
581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
582 {
583   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
584   int        ierr,nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
618   int        ierr;
619 
620   PetscFunctionBegin;
621   /* do nondiagonal part */
622   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
623   /* send it on its way */
624   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
625   /* do local part */
626   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
627   /* receive remote parts: note this assumes the values are not actually */
628   /* inserted in yy until the next line, which is true for my implementation*/
629   /* but is not perhaps always true. */
630   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 EXTERN_C_BEGIN
635 #undef __FUNCT__
636 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
637 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
638 {
639   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
640   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
641   IS         Me,Notme;
642   int        M,N,first,last,*notme,i, ierr;
643 
644   PetscFunctionBegin;
645 
646   /* Easy test: symmetric diagonal block */
647   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
648   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
649   if (!*f) PetscFunctionReturn(0);
650 
651   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
652   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
653   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
654   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
655   for (i=0; i<first; i++) notme[i] = i;
656   for (i=last; i<M; i++) notme[i-last+first] = i;
657   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
658   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
659   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
660   Aoff = Aoffs[0];
661   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
662   Boff = Boffs[0];
663   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
664   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
665   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
666   ierr = ISDestroy(Me); CHKERRQ(ierr);
667   ierr = ISDestroy(Notme); CHKERRQ(ierr);
668 
669   PetscFunctionReturn(0);
670 }
671 EXTERN_C_END
672 
673 #undef __FUNCT__
674 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
675 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
676 {
677   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
678   int        ierr;
679 
680   PetscFunctionBegin;
681   /* do nondiagonal part */
682   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
683   /* send it on its way */
684   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
685   /* do local part */
686   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
687   /* receive remote parts: note this assumes the values are not actually */
688   /* inserted in yy until the next line, which is true for my implementation*/
689   /* but is not perhaps always true. */
690   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 /*
695   This only works correctly for square matrices where the subblock A->A is the
696    diagonal block
697 */
698 #undef __FUNCT__
699 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
700 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
701 {
702   int        ierr;
703   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
704 
705   PetscFunctionBegin;
706   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
707   if (a->rstart != a->cstart || a->rend != a->cend) {
708     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
709   }
710   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatScale_MPIAIJ"
716 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
717 {
718   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
719   int        ierr;
720 
721   PetscFunctionBegin;
722   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
723   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatDestroy_MPIAIJ"
729 int MatDestroy_MPIAIJ(Mat mat)
730 {
731   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
732   int        ierr;
733 
734   PetscFunctionBegin;
735 #if defined(PETSC_USE_LOG)
736   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
737 #endif
738   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
739   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
740   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
741   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
742 #if defined (PETSC_USE_CTABLE)
743   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
744 #else
745   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
746 #endif
747   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
748   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
749   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
750   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
751   ierr = PetscFree(aij);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 extern int MatFactorInfo_MUMPS(Mat,PetscViewer);
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatView_MPIAIJ_Binary"
759 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
760 {
761   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
762   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
763   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
764   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
765   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
766   PetscScalar       *column_values;
767 
768   PetscFunctionBegin;
769   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
770   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
771   nz   = A->nz + B->nz;
772   if (rank == 0) {
773     header[0] = MAT_FILE_COOKIE;
774     header[1] = mat->M;
775     header[2] = mat->N;
776     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
777     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
778     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
779     /* get largest number of rows any processor has */
780     rlen = mat->m;
781     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
782     for (i=1; i<size; i++) {
783       rlen = PetscMax(rlen,range[i+1] - range[i]);
784     }
785   } else {
786     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
787     rlen = mat->m;
788   }
789 
790   /* load up the local row counts */
791   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
792   for (i=0; i<mat->m; i++) {
793     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
794   }
795 
796   /* store the row lengths to the file */
797   if (rank == 0) {
798     MPI_Status status;
799     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
800     for (i=1; i<size; i++) {
801       rlen = range[i+1] - range[i];
802       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
803       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
804     }
805   } else {
806     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
807   }
808   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
809 
810   /* load up the local column indices */
811   nzmax = nz; /* )th processor needs space a largest processor needs */
812   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
813   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
814   cnt  = 0;
815   for (i=0; i<mat->m; i++) {
816     for (j=B->i[i]; j<B->i[i+1]; j++) {
817       if ( (col = garray[B->j[j]]) > cstart) break;
818       column_indices[cnt++] = col;
819     }
820     for (k=A->i[i]; k<A->i[i+1]; k++) {
821       column_indices[cnt++] = A->j[k] + cstart;
822     }
823     for (; j<B->i[i+1]; j++) {
824       column_indices[cnt++] = garray[B->j[j]];
825     }
826   }
827   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
828 
829   /* store the column indices to the file */
830   if (rank == 0) {
831     MPI_Status status;
832     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
833     for (i=1; i<size; i++) {
834       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
835       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
836       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
837       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
838     }
839   } else {
840     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
841     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(column_indices);CHKERRQ(ierr);
844 
845   /* load up the local column values */
846   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
847   cnt  = 0;
848   for (i=0; i<mat->m; i++) {
849     for (j=B->i[i]; j<B->i[i+1]; j++) {
850       if ( garray[B->j[j]] > cstart) break;
851       column_values[cnt++] = B->a[j];
852     }
853     for (k=A->i[i]; k<A->i[i+1]; k++) {
854       column_values[cnt++] = A->a[k];
855     }
856     for (; j<B->i[i+1]; j++) {
857       column_values[cnt++] = B->a[j];
858     }
859   }
860   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
861 
862   /* store the column values to the file */
863   if (rank == 0) {
864     MPI_Status status;
865     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
866     for (i=1; i<size; i++) {
867       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
868       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
869       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
870       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
871     }
872   } else {
873     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
874     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
875   }
876   ierr = PetscFree(column_values);CHKERRQ(ierr);
877   PetscFunctionReturn(0);
878 }
879 
880 #undef __FUNCT__
881 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
882 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
883 {
884   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
885   int               ierr,rank = aij->rank,size = aij->size;
886   PetscTruth        isdraw,isascii,flg,isbinary;
887   PetscViewer       sviewer;
888   PetscViewerFormat format;
889 
890   PetscFunctionBegin;
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
894   if (isascii) {
895     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
896     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
897       MatInfo info;
898       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
899       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
901       if (flg) {
902         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
903 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
904       } else {
905         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
906 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
907       }
908       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
909       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
910       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
911       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
912       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
913       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
914       PetscFunctionReturn(0);
915     } else if (format == PETSC_VIEWER_ASCII_INFO) {
916       PetscFunctionReturn(0);
917     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
918 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE)
919       ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr);
920 #endif
921       PetscFunctionReturn(0);
922     }
923   } else if (isbinary) {
924     if (size == 1) {
925       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
926       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
927     } else {
928       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
929     }
930     PetscFunctionReturn(0);
931   } else if (isdraw) {
932     PetscDraw  draw;
933     PetscTruth isnull;
934     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
935     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
936   }
937 
938   if (size == 1) {
939     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
940     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
941   } else {
942     /* assemble the entire matrix onto first processor. */
943     Mat         A;
944     Mat_SeqAIJ *Aloc;
945     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
946     PetscScalar *a;
947 
948     if (!rank) {
949       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
950     } else {
951       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
952     }
953     PetscLogObjectParent(mat,A);
954 
955     /* copy over the A part */
956     Aloc = (Mat_SeqAIJ*)aij->A->data;
957     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
958     row = aij->rstart;
959     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
960     for (i=0; i<m; i++) {
961       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
962       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
963     }
964     aj = Aloc->j;
965     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
966 
967     /* copy over the B part */
968     Aloc = (Mat_SeqAIJ*)aij->B->data;
969     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
970     row  = aij->rstart;
971     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
972     ct   = cols;
973     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
974     for (i=0; i<m; i++) {
975       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
976       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
977     }
978     ierr = PetscFree(ct);CHKERRQ(ierr);
979     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     /*
982        Everyone has to call to draw the matrix since the graphics waits are
983        synchronized across all processors that share the PetscDraw object
984     */
985     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
986     if (!rank) {
987       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
988       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
989     }
990     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
991     ierr = MatDestroy(A);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatView_MPIAIJ"
998 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
999 {
1000   int        ierr;
1001   PetscTruth isascii,isdraw,issocket,isbinary;
1002 
1003   PetscFunctionBegin;
1004   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1005   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1006   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1007   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1008   if (isascii || isdraw || isbinary || issocket) {
1009     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1010   } else {
1011     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatRelax_MPIAIJ"
1020 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1021 {
1022   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1023   int          ierr;
1024   Vec          bb1;
1025   PetscScalar  mone=-1.0;
1026 
1027   PetscFunctionBegin;
1028   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1029 
1030   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1031 
1032   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1033     if (flag & SOR_ZERO_INITIAL_GUESS) {
1034       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1035       its--;
1036     }
1037 
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1051     if (flag & SOR_ZERO_INITIAL_GUESS) {
1052       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1053       its--;
1054     }
1055     while (its--) {
1056       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1057       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1058 
1059       /* update rhs: bb1 = bb - B*x */
1060       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1061       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1062 
1063       /* local sweep */
1064       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1065       CHKERRQ(ierr);
1066     }
1067   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1068     if (flag & SOR_ZERO_INITIAL_GUESS) {
1069       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1070       its--;
1071     }
1072     while (its--) {
1073       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1074       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1075 
1076       /* update rhs: bb1 = bb - B*x */
1077       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1078       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1079 
1080       /* local sweep */
1081       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1082       CHKERRQ(ierr);
1083     }
1084   } else {
1085     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1086   }
1087 
1088   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1094 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1095 {
1096   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1097   Mat        A = mat->A,B = mat->B;
1098   int        ierr;
1099   PetscReal  isend[5],irecv[5];
1100 
1101   PetscFunctionBegin;
1102   info->block_size     = 1.0;
1103   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1104   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1105   isend[3] = info->memory;  isend[4] = info->mallocs;
1106   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1107   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1108   isend[3] += info->memory;  isend[4] += info->mallocs;
1109   if (flag == MAT_LOCAL) {
1110     info->nz_used      = isend[0];
1111     info->nz_allocated = isend[1];
1112     info->nz_unneeded  = isend[2];
1113     info->memory       = isend[3];
1114     info->mallocs      = isend[4];
1115   } else if (flag == MAT_GLOBAL_MAX) {
1116     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1117     info->nz_used      = irecv[0];
1118     info->nz_allocated = irecv[1];
1119     info->nz_unneeded  = irecv[2];
1120     info->memory       = irecv[3];
1121     info->mallocs      = irecv[4];
1122   } else if (flag == MAT_GLOBAL_SUM) {
1123     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1124     info->nz_used      = irecv[0];
1125     info->nz_allocated = irecv[1];
1126     info->nz_unneeded  = irecv[2];
1127     info->memory       = irecv[3];
1128     info->mallocs      = irecv[4];
1129   }
1130   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1131   info->fill_ratio_needed = 0;
1132   info->factor_mallocs    = 0;
1133   info->rows_global       = (double)matin->M;
1134   info->columns_global    = (double)matin->N;
1135   info->rows_local        = (double)matin->m;
1136   info->columns_local     = (double)matin->N;
1137 
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatSetOption_MPIAIJ"
1143 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1144 {
1145   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1146   int        ierr;
1147 
1148   PetscFunctionBegin;
1149   switch (op) {
1150   case MAT_NO_NEW_NONZERO_LOCATIONS:
1151   case MAT_YES_NEW_NONZERO_LOCATIONS:
1152   case MAT_COLUMNS_UNSORTED:
1153   case MAT_COLUMNS_SORTED:
1154   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1155   case MAT_KEEP_ZEROED_ROWS:
1156   case MAT_NEW_NONZERO_LOCATION_ERR:
1157   case MAT_USE_INODES:
1158   case MAT_DO_NOT_USE_INODES:
1159   case MAT_IGNORE_ZERO_ENTRIES:
1160     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1161     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1162     break;
1163   case MAT_ROW_ORIENTED:
1164     a->roworiented = PETSC_TRUE;
1165     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1166     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1167     break;
1168   case MAT_ROWS_SORTED:
1169   case MAT_ROWS_UNSORTED:
1170   case MAT_YES_NEW_DIAGONALS:
1171     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1172     break;
1173   case MAT_COLUMN_ORIENTED:
1174     a->roworiented = PETSC_FALSE;
1175     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1176     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1177     break;
1178   case MAT_IGNORE_OFF_PROC_ENTRIES:
1179     a->donotstash = PETSC_TRUE;
1180     break;
1181   case MAT_NO_NEW_DIAGONALS:
1182     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1183   default:
1184     SETERRQ(PETSC_ERR_SUP,"unknown option");
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatGetRow_MPIAIJ"
1191 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1192 {
1193   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1194   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1195   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1196   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1197   int          *cmap,*idx_p;
1198 
1199   PetscFunctionBegin;
1200   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1201   mat->getrowactive = PETSC_TRUE;
1202 
1203   if (!mat->rowvalues && (idx || v)) {
1204     /*
1205         allocate enough space to hold information from the longest row.
1206     */
1207     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1208     int     max = 1,tmp;
1209     for (i=0; i<matin->m; i++) {
1210       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1211       if (max < tmp) { max = tmp; }
1212     }
1213     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1214     mat->rowindices = (int*)(mat->rowvalues + max);
1215   }
1216 
1217   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1218   lrow = row - rstart;
1219 
1220   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1221   if (!v)   {pvA = 0; pvB = 0;}
1222   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1223   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1224   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1225   nztot = nzA + nzB;
1226 
1227   cmap  = mat->garray;
1228   if (v  || idx) {
1229     if (nztot) {
1230       /* Sort by increasing column numbers, assuming A and B already sorted */
1231       int imark = -1;
1232       if (v) {
1233         *v = v_p = mat->rowvalues;
1234         for (i=0; i<nzB; i++) {
1235           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1236           else break;
1237         }
1238         imark = i;
1239         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1240         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1241       }
1242       if (idx) {
1243         *idx = idx_p = mat->rowindices;
1244         if (imark > -1) {
1245           for (i=0; i<imark; i++) {
1246             idx_p[i] = cmap[cworkB[i]];
1247           }
1248         } else {
1249           for (i=0; i<nzB; i++) {
1250             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1251             else break;
1252           }
1253           imark = i;
1254         }
1255         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1256         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1257       }
1258     } else {
1259       if (idx) *idx = 0;
1260       if (v)   *v   = 0;
1261     }
1262   }
1263   *nz = nztot;
1264   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1265   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1271 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1272 {
1273   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1274 
1275   PetscFunctionBegin;
1276   if (aij->getrowactive == PETSC_FALSE) {
1277     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1278   }
1279   aij->getrowactive = PETSC_FALSE;
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatNorm_MPIAIJ"
1285 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1286 {
1287   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1289   int          ierr,i,j,cstart = aij->cstart;
1290   PetscReal    sum = 0.0;
1291   PetscScalar  *v;
1292 
1293   PetscFunctionBegin;
1294   if (aij->size == 1) {
1295     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1296   } else {
1297     if (type == NORM_FROBENIUS) {
1298       v = amat->a;
1299       for (i=0; i<amat->nz; i++) {
1300 #if defined(PETSC_USE_COMPLEX)
1301         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1302 #else
1303         sum += (*v)*(*v); v++;
1304 #endif
1305       }
1306       v = bmat->a;
1307       for (i=0; i<bmat->nz; i++) {
1308 #if defined(PETSC_USE_COMPLEX)
1309         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1310 #else
1311         sum += (*v)*(*v); v++;
1312 #endif
1313       }
1314       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1315       *norm = sqrt(*norm);
1316     } else if (type == NORM_1) { /* max column norm */
1317       PetscReal *tmp,*tmp2;
1318       int    *jj,*garray = aij->garray;
1319       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1320       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1321       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1322       *norm = 0.0;
1323       v = amat->a; jj = amat->j;
1324       for (j=0; j<amat->nz; j++) {
1325         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1326       }
1327       v = bmat->a; jj = bmat->j;
1328       for (j=0; j<bmat->nz; j++) {
1329         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1330       }
1331       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1332       for (j=0; j<mat->N; j++) {
1333         if (tmp2[j] > *norm) *norm = tmp2[j];
1334       }
1335       ierr = PetscFree(tmp);CHKERRQ(ierr);
1336       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1337     } else if (type == NORM_INFINITY) { /* max row norm */
1338       PetscReal ntemp = 0.0;
1339       for (j=0; j<aij->A->m; j++) {
1340         v = amat->a + amat->i[j];
1341         sum = 0.0;
1342         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1343           sum += PetscAbsScalar(*v); v++;
1344         }
1345         v = bmat->a + bmat->i[j];
1346         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1347           sum += PetscAbsScalar(*v); v++;
1348         }
1349         if (sum > ntemp) ntemp = sum;
1350       }
1351       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1352     } else {
1353       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1354     }
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatTranspose_MPIAIJ"
1361 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1362 {
1363   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1364   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1365   int          ierr;
1366   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1367   Mat          B;
1368   PetscScalar  *array;
1369 
1370   PetscFunctionBegin;
1371   if (!matout && M != N) {
1372     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1373   }
1374 
1375   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1376 
1377   /* copy over the A part */
1378   Aloc = (Mat_SeqAIJ*)a->A->data;
1379   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1380   row = a->rstart;
1381   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1382   for (i=0; i<m; i++) {
1383     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1384     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1385   }
1386   aj = Aloc->j;
1387   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1388 
1389   /* copy over the B part */
1390   Aloc = (Mat_SeqAIJ*)a->B->data;
1391   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1392   row  = a->rstart;
1393   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1394   ct   = cols;
1395   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1396   for (i=0; i<m; i++) {
1397     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1398     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1399   }
1400   ierr = PetscFree(ct);CHKERRQ(ierr);
1401   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1403   if (matout) {
1404     *matout = B;
1405   } else {
1406     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1413 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1414 {
1415   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1416   Mat        a = aij->A,b = aij->B;
1417   int        ierr,s1,s2,s3;
1418 
1419   PetscFunctionBegin;
1420   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1421   if (rr) {
1422     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1423     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1424     /* Overlap communication with computation. */
1425     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1426   }
1427   if (ll) {
1428     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1429     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1430     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1431   }
1432   /* scale  the diagonal block */
1433   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1434 
1435   if (rr) {
1436     /* Do a scatter end and then right scale the off-diagonal block */
1437     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1438     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1439   }
1440 
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1447 int MatPrintHelp_MPIAIJ(Mat A)
1448 {
1449   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1450   int        ierr;
1451 
1452   PetscFunctionBegin;
1453   if (!a->rank) {
1454     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1461 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1462 {
1463   PetscFunctionBegin;
1464   *bs = 1;
1465   PetscFunctionReturn(0);
1466 }
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1469 int MatSetUnfactored_MPIAIJ(Mat A)
1470 {
1471   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1472   int        ierr;
1473 
1474   PetscFunctionBegin;
1475   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatEqual_MPIAIJ"
1481 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1482 {
1483   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1484   Mat        a,b,c,d;
1485   PetscTruth flg;
1486   int        ierr;
1487 
1488   PetscFunctionBegin;
1489   a = matA->A; b = matA->B;
1490   c = matB->A; d = matB->B;
1491 
1492   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1493   if (flg == PETSC_TRUE) {
1494     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1495   }
1496   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatCopy_MPIAIJ"
1502 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1503 {
1504   int        ierr;
1505   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1506   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1507 
1508   PetscFunctionBegin;
1509   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1510   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1511     /* because of the column compression in the off-processor part of the matrix a->B,
1512        the number of columns in a->B and b->B may be different, hence we cannot call
1513        the MatCopy() directly on the two parts. If need be, we can provide a more
1514        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1515        then copying the submatrices */
1516     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1517   } else {
1518     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1519     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1526 int MatSetUpPreallocation_MPIAIJ(Mat A)
1527 {
1528   int        ierr;
1529 
1530   PetscFunctionBegin;
1531   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1536 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1537 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1538 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
1539 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1540 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1541 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1542 #endif
1543 
1544 #include "petscblaslapack.h"
1545 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure);
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatAXPY_MPIAIJ"
1548 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1549 {
1550   int        ierr,one=1,i;
1551   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1552   Mat_SeqAIJ *x,*y;
1553 
1554   PetscFunctionBegin;
1555   if (str == SAME_NONZERO_PATTERN) {
1556     x = (Mat_SeqAIJ *)xx->A->data;
1557     y = (Mat_SeqAIJ *)yy->A->data;
1558     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1559     x = (Mat_SeqAIJ *)xx->B->data;
1560     y = (Mat_SeqAIJ *)yy->B->data;
1561     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1562   } else if (str == SUBSET_NONZERO_PATTERN) {
1563     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1564 
1565     x = (Mat_SeqAIJ *)xx->B->data;
1566     y = (Mat_SeqAIJ *)yy->B->data;
1567     if (y->xtoy && y->XtoY != xx->B) {
1568       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1569       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1570     }
1571     if (!y->xtoy) { /* get xtoy */
1572       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1573       y->XtoY = xx->B;
1574     }
1575     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1576   } else {
1577     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 /* -------------------------------------------------------------------*/
1583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1584        MatGetRow_MPIAIJ,
1585        MatRestoreRow_MPIAIJ,
1586        MatMult_MPIAIJ,
1587        MatMultAdd_MPIAIJ,
1588        MatMultTranspose_MPIAIJ,
1589        MatMultTransposeAdd_MPIAIJ,
1590        0,
1591        0,
1592        0,
1593        0,
1594        0,
1595        0,
1596        MatRelax_MPIAIJ,
1597        MatTranspose_MPIAIJ,
1598        MatGetInfo_MPIAIJ,
1599        MatEqual_MPIAIJ,
1600        MatGetDiagonal_MPIAIJ,
1601        MatDiagonalScale_MPIAIJ,
1602        MatNorm_MPIAIJ,
1603        MatAssemblyBegin_MPIAIJ,
1604        MatAssemblyEnd_MPIAIJ,
1605        0,
1606        MatSetOption_MPIAIJ,
1607        MatZeroEntries_MPIAIJ,
1608        MatZeroRows_MPIAIJ,
1609 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1610        MatLUFactorSymbolic_MPIAIJ_TFS,
1611 #else
1612        0,
1613 #endif
1614        0,
1615        0,
1616        0,
1617        MatSetUpPreallocation_MPIAIJ,
1618        0,
1619        0,
1620        0,
1621        0,
1622        MatDuplicate_MPIAIJ,
1623        0,
1624        0,
1625        0,
1626        0,
1627        MatAXPY_MPIAIJ,
1628        MatGetSubMatrices_MPIAIJ,
1629        MatIncreaseOverlap_MPIAIJ,
1630        MatGetValues_MPIAIJ,
1631        MatCopy_MPIAIJ,
1632        MatPrintHelp_MPIAIJ,
1633        MatScale_MPIAIJ,
1634        0,
1635        0,
1636        0,
1637        MatGetBlockSize_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641        0,
1642        MatFDColoringCreate_MPIAIJ,
1643        0,
1644        MatSetUnfactored_MPIAIJ,
1645        0,
1646        0,
1647        MatGetSubMatrix_MPIAIJ,
1648        MatDestroy_MPIAIJ,
1649        MatView_MPIAIJ,
1650        MatGetPetscMaps_Petsc,
1651        0,
1652        0,
1653        0,
1654        0,
1655        0,
1656        0,
1657        0,
1658        0,
1659        MatSetColoring_MPIAIJ,
1660        MatSetValuesAdic_MPIAIJ,
1661        MatSetValuesAdifor_MPIAIJ
1662 };
1663 
1664 /* ----------------------------------------------------------------------------------------*/
1665 
1666 EXTERN_C_BEGIN
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1669 int MatStoreValues_MPIAIJ(Mat mat)
1670 {
1671   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1672   int        ierr;
1673 
1674   PetscFunctionBegin;
1675   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1676   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 EXTERN_C_END
1680 
1681 EXTERN_C_BEGIN
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1684 int MatRetrieveValues_MPIAIJ(Mat mat)
1685 {
1686   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1687   int        ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1691   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 EXTERN_C_END
1695 
1696 #include "petscpc.h"
1697 EXTERN_C_BEGIN
1698 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1699 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec);
1700 EXTERN_C_END
1701 
1702 EXTERN_C_BEGIN
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1705 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1706 {
1707   Mat_MPIAIJ   *b;
1708   int          ierr,i;
1709 
1710   PetscFunctionBegin;
1711   B->preallocated = PETSC_TRUE;
1712   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1713   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1714   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1715   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1716   if (d_nnz) {
1717     for (i=0; i<B->m; i++) {
1718       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1719     }
1720   }
1721   if (o_nnz) {
1722     for (i=0; i<B->m; i++) {
1723       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1724     }
1725   }
1726   b = (Mat_MPIAIJ*)B->data;
1727 
1728   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1729   PetscLogObjectParent(B,b->A);
1730   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1731   PetscLogObjectParent(B,b->B);
1732 
1733   PetscFunctionReturn(0);
1734 }
1735 EXTERN_C_END
1736 
1737 EXTERN_C_BEGIN
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatCreate_MPIAIJ"
1740 int MatCreate_MPIAIJ(Mat B)
1741 {
1742   Mat_MPIAIJ *b;
1743   int        ierr,i,size;
1744 
1745   PetscFunctionBegin;
1746   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1747 
1748   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1749   B->data         = (void*)b;
1750   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1751   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1752   B->factor       = 0;
1753   B->assembled    = PETSC_FALSE;
1754   B->mapping      = 0;
1755 
1756   B->insertmode      = NOT_SET_VALUES;
1757   b->size            = size;
1758   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1759 
1760   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1761   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1762 
1763   /* the information in the maps duplicates the information computed below, eventually
1764      we should remove the duplicate information that is not contained in the maps */
1765   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1766   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1767 
1768   /* build local table of row and column ownerships */
1769   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1770   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1771   b->cowners = b->rowners + b->size + 2;
1772   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1773   b->rowners[0] = 0;
1774   for (i=2; i<=b->size; i++) {
1775     b->rowners[i] += b->rowners[i-1];
1776   }
1777   b->rstart = b->rowners[b->rank];
1778   b->rend   = b->rowners[b->rank+1];
1779   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1780   b->cowners[0] = 0;
1781   for (i=2; i<=b->size; i++) {
1782     b->cowners[i] += b->cowners[i-1];
1783   }
1784   b->cstart = b->cowners[b->rank];
1785   b->cend   = b->cowners[b->rank+1];
1786 
1787   /* build cache for off array entries formed */
1788   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1789   b->donotstash  = PETSC_FALSE;
1790   b->colmap      = 0;
1791   b->garray      = 0;
1792   b->roworiented = PETSC_TRUE;
1793 
1794   /* stuff used for matrix vector multiply */
1795   b->lvec      = PETSC_NULL;
1796   b->Mvctx     = PETSC_NULL;
1797 
1798   /* stuff for MatGetRow() */
1799   b->rowindices   = 0;
1800   b->rowvalues    = 0;
1801   b->getrowactive = PETSC_FALSE;
1802 
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1804                                      "MatStoreValues_MPIAIJ",
1805                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1807                                      "MatRetrieveValues_MPIAIJ",
1808                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1810 				     "MatGetDiagonalBlock_MPIAIJ",
1811                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1813 				     "MatIsSymmetric_MPIAIJ",
1814 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1816 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1817 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1819 				     "MatDiagonalScaleLocal_MPIAIJ",
1820 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 EXTERN_C_END
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1827 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1828 {
1829   Mat        mat;
1830   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1831   int        ierr;
1832 
1833   PetscFunctionBegin;
1834   *newmat       = 0;
1835   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1836   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1837   a    = (Mat_MPIAIJ*)mat->data;
1838   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1839   mat->factor       = matin->factor;
1840   mat->assembled    = PETSC_TRUE;
1841   mat->insertmode   = NOT_SET_VALUES;
1842   mat->preallocated = PETSC_TRUE;
1843 
1844   a->rstart       = oldmat->rstart;
1845   a->rend         = oldmat->rend;
1846   a->cstart       = oldmat->cstart;
1847   a->cend         = oldmat->cend;
1848   a->size         = oldmat->size;
1849   a->rank         = oldmat->rank;
1850   a->donotstash   = oldmat->donotstash;
1851   a->roworiented  = oldmat->roworiented;
1852   a->rowindices   = 0;
1853   a->rowvalues    = 0;
1854   a->getrowactive = PETSC_FALSE;
1855 
1856   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1857   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1858   if (oldmat->colmap) {
1859 #if defined (PETSC_USE_CTABLE)
1860     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1861 #else
1862     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1863     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1864     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1865 #endif
1866   } else a->colmap = 0;
1867   if (oldmat->garray) {
1868     int len;
1869     len  = oldmat->B->n;
1870     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1871     PetscLogObjectMemory(mat,len*sizeof(int));
1872     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1873   } else a->garray = 0;
1874 
1875   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1876   PetscLogObjectParent(mat,a->lvec);
1877   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1878   PetscLogObjectParent(mat,a->Mvctx);
1879   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1880   PetscLogObjectParent(mat,a->A);
1881   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1882   PetscLogObjectParent(mat,a->B);
1883   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1884   *newmat = mat;
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #include "petscsys.h"
1889 
1890 EXTERN_C_BEGIN
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatLoad_MPIAIJ"
1893 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1894 {
1895   Mat          A;
1896   PetscScalar  *vals,*svals;
1897   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1898   MPI_Status   status;
1899   int          i,nz,ierr,j,rstart,rend,fd;
1900   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1901   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1902   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1903 #if defined(PETSC_HAVE_MUMPS)
1904   PetscTruth   flag;
1905 #endif
1906 
1907   PetscFunctionBegin;
1908   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1909   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1910   if (!rank) {
1911     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1912     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1913     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1914     if (header[3] < 0) {
1915       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1916     }
1917   }
1918 
1919   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1920   M = header[1]; N = header[2];
1921   /* determine ownership of all rows */
1922   m = M/size + ((M % size) > rank);
1923   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1924   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1925   rowners[0] = 0;
1926   for (i=2; i<=size; i++) {
1927     rowners[i] += rowners[i-1];
1928   }
1929   rstart = rowners[rank];
1930   rend   = rowners[rank+1];
1931 
1932   /* distribute row lengths to all processors */
1933   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1934   offlens = ourlens + (rend-rstart);
1935   if (!rank) {
1936     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1937     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1938     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1939     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1940     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1941     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1942   } else {
1943     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1944   }
1945 
1946   if (!rank) {
1947     /* calculate the number of nonzeros on each processor */
1948     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1949     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1950     for (i=0; i<size; i++) {
1951       for (j=rowners[i]; j< rowners[i+1]; j++) {
1952         procsnz[i] += rowlengths[j];
1953       }
1954     }
1955     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1956 
1957     /* determine max buffer needed and allocate it */
1958     maxnz = 0;
1959     for (i=0; i<size; i++) {
1960       maxnz = PetscMax(maxnz,procsnz[i]);
1961     }
1962     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1963 
1964     /* read in my part of the matrix column indices  */
1965     nz   = procsnz[0];
1966     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1967     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1968 
1969     /* read in every one elses and ship off */
1970     for (i=1; i<size; i++) {
1971       nz   = procsnz[i];
1972       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1973       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1974     }
1975     ierr = PetscFree(cols);CHKERRQ(ierr);
1976   } else {
1977     /* determine buffer space needed for message */
1978     nz = 0;
1979     for (i=0; i<m; i++) {
1980       nz += ourlens[i];
1981     }
1982     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1983 
1984     /* receive message of column indices*/
1985     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1986     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1987     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1988   }
1989 
1990   /* determine column ownership if matrix is not square */
1991   if (N != M) {
1992     n      = N/size + ((N % size) > rank);
1993     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1994     cstart = cend - n;
1995   } else {
1996     cstart = rstart;
1997     cend   = rend;
1998     n      = cend - cstart;
1999   }
2000 
2001   /* loop over local rows, determining number of off diagonal entries */
2002   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2003   jj = 0;
2004   for (i=0; i<m; i++) {
2005     for (j=0; j<ourlens[i]; j++) {
2006       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2007       jj++;
2008     }
2009   }
2010 
2011   /* create our matrix */
2012   for (i=0; i<m; i++) {
2013     ourlens[i] -= offlens[i];
2014   }
2015   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2016   ierr = MatSetType(A,type);CHKERRQ(ierr);
2017   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2018 
2019   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2020   for (i=0; i<m; i++) {
2021     ourlens[i] += offlens[i];
2022   }
2023 
2024   if (!rank) {
2025     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2026 
2027     /* read in my part of the matrix numerical values  */
2028     nz   = procsnz[0];
2029     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2030 
2031     /* insert into matrix */
2032     jj      = rstart;
2033     smycols = mycols;
2034     svals   = vals;
2035     for (i=0; i<m; i++) {
2036       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2037       smycols += ourlens[i];
2038       svals   += ourlens[i];
2039       jj++;
2040     }
2041 
2042     /* read in other processors and ship out */
2043     for (i=1; i<size; i++) {
2044       nz   = procsnz[i];
2045       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2046       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2047     }
2048     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2049   } else {
2050     /* receive numeric values */
2051     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2052 
2053     /* receive message of values*/
2054     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2055     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2056     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2057 
2058     /* insert into matrix */
2059     jj      = rstart;
2060     smycols = mycols;
2061     svals   = vals;
2062     for (i=0; i<m; i++) {
2063       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2064       smycols += ourlens[i];
2065       svals   += ourlens[i];
2066       jj++;
2067     }
2068   }
2069   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2070   ierr = PetscFree(vals);CHKERRQ(ierr);
2071   ierr = PetscFree(mycols);CHKERRQ(ierr);
2072   ierr = PetscFree(rowners);CHKERRQ(ierr);
2073 
2074   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076 #if defined(PETSC_HAVE_MUMPS)
2077   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2078   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2079 #endif
2080   *newmat = A;
2081   PetscFunctionReturn(0);
2082 }
2083 EXTERN_C_END
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2087 /*
2088     Not great since it makes two copies of the submatrix, first an SeqAIJ
2089   in local and then by concatenating the local matrices the end result.
2090   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2091 */
2092 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2093 {
2094   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2095   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2096   Mat          *local,M,Mreuse;
2097   PetscScalar  *vwork,*aa;
2098   MPI_Comm     comm = mat->comm;
2099   Mat_SeqAIJ   *aij;
2100 
2101 
2102   PetscFunctionBegin;
2103   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105 
2106   if (call ==  MAT_REUSE_MATRIX) {
2107     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2108     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2109     local = &Mreuse;
2110     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2111   } else {
2112     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2113     Mreuse = *local;
2114     ierr   = PetscFree(local);CHKERRQ(ierr);
2115   }
2116 
2117   /*
2118       m - number of local rows
2119       n - number of columns (same on all processors)
2120       rstart - first row in new global matrix generated
2121   */
2122   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2123   if (call == MAT_INITIAL_MATRIX) {
2124     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2125     ii  = aij->i;
2126     jj  = aij->j;
2127 
2128     /*
2129         Determine the number of non-zeros in the diagonal and off-diagonal
2130         portions of the matrix in order to do correct preallocation
2131     */
2132 
2133     /* first get start and end of "diagonal" columns */
2134     if (csize == PETSC_DECIDE) {
2135       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2136       if (mglobal == n) { /* square matrix */
2137 	nlocal = m;
2138       } else {
2139         nlocal = n/size + ((n % size) > rank);
2140       }
2141     } else {
2142       nlocal = csize;
2143     }
2144     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2145     rstart = rend - nlocal;
2146     if (rank == size - 1 && rend != n) {
2147       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2148     }
2149 
2150     /* next, compute all the lengths */
2151     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2152     olens = dlens + m;
2153     for (i=0; i<m; i++) {
2154       jend = ii[i+1] - ii[i];
2155       olen = 0;
2156       dlen = 0;
2157       for (j=0; j<jend; j++) {
2158         if (*jj < rstart || *jj >= rend) olen++;
2159         else dlen++;
2160         jj++;
2161       }
2162       olens[i] = olen;
2163       dlens[i] = dlen;
2164     }
2165     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2166     ierr = PetscFree(dlens);CHKERRQ(ierr);
2167   } else {
2168     int ml,nl;
2169 
2170     M = *newmat;
2171     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2172     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2173     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2174     /*
2175          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2176        rather than the slower MatSetValues().
2177     */
2178     M->was_assembled = PETSC_TRUE;
2179     M->assembled     = PETSC_FALSE;
2180   }
2181   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2182   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2183   ii  = aij->i;
2184   jj  = aij->j;
2185   aa  = aij->a;
2186   for (i=0; i<m; i++) {
2187     row   = rstart + i;
2188     nz    = ii[i+1] - ii[i];
2189     cwork = jj;     jj += nz;
2190     vwork = aa;     aa += nz;
2191     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2192   }
2193 
2194   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2195   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2196   *newmat = M;
2197 
2198   /* save submatrix used in processor for next request */
2199   if (call ==  MAT_INITIAL_MATRIX) {
2200     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2201     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2202   }
2203 
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2209 /*@C
2210    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2211    (the default parallel PETSc format).  For good matrix assembly performance
2212    the user should preallocate the matrix storage by setting the parameters
2213    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2214    performance can be increased by more than a factor of 50.
2215 
2216    Collective on MPI_Comm
2217 
2218    Input Parameters:
2219 +  A - the matrix
2220 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2221            (same value is used for all local rows)
2222 .  d_nnz - array containing the number of nonzeros in the various rows of the
2223            DIAGONAL portion of the local submatrix (possibly different for each row)
2224            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2225            The size of this array is equal to the number of local rows, i.e 'm'.
2226            You must leave room for the diagonal entry even if it is zero.
2227 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2228            submatrix (same value is used for all local rows).
2229 -  o_nnz - array containing the number of nonzeros in the various rows of the
2230            OFF-DIAGONAL portion of the local submatrix (possibly different for
2231            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2232            structure. The size of this array is equal to the number
2233            of local rows, i.e 'm'.
2234 
2235    The AIJ format (also called the Yale sparse matrix format or
2236    compressed row storage), is fully compatible with standard Fortran 77
2237    storage.  That is, the stored row and column indices can begin at
2238    either one (as in Fortran) or zero.  See the users manual for details.
2239 
2240    The user MUST specify either the local or global matrix dimensions
2241    (possibly both).
2242 
2243    The parallel matrix is partitioned such that the first m0 rows belong to
2244    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2245    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2246 
2247    The DIAGONAL portion of the local submatrix of a processor can be defined
2248    as the submatrix which is obtained by extraction the part corresponding
2249    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2250    first row that belongs to the processor, and r2 is the last row belonging
2251    to the this processor. This is a square mxm matrix. The remaining portion
2252    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2253 
2254    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2255 
2256    By default, this format uses inodes (identical nodes) when possible.
2257    We search for consecutive rows with the same nonzero structure, thereby
2258    reusing matrix information to achieve increased efficiency.
2259 
2260    Options Database Keys:
2261 +  -mat_aij_no_inode  - Do not use inodes
2262 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2263 -  -mat_aij_oneindex - Internally use indexing starting at 1
2264         rather than 0.  Note that when calling MatSetValues(),
2265         the user still MUST index entries starting at 0!
2266 
2267    Example usage:
2268 
2269    Consider the following 8x8 matrix with 34 non-zero values, that is
2270    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2271    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2272    as follows:
2273 
2274 .vb
2275             1  2  0  |  0  3  0  |  0  4
2276     Proc0   0  5  6  |  7  0  0  |  8  0
2277             9  0 10  | 11  0  0  | 12  0
2278     -------------------------------------
2279            13  0 14  | 15 16 17  |  0  0
2280     Proc1   0 18  0  | 19 20 21  |  0  0
2281             0  0  0  | 22 23  0  | 24  0
2282     -------------------------------------
2283     Proc2  25 26 27  |  0  0 28  | 29  0
2284            30  0  0  | 31 32 33  |  0 34
2285 .ve
2286 
2287    This can be represented as a collection of submatrices as:
2288 
2289 .vb
2290       A B C
2291       D E F
2292       G H I
2293 .ve
2294 
2295    Where the submatrices A,B,C are owned by proc0, D,E,F are
2296    owned by proc1, G,H,I are owned by proc2.
2297 
2298    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2299    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2300    The 'M','N' parameters are 8,8, and have the same values on all procs.
2301 
2302    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2303    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2304    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2305    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2306    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2307    matrix, ans [DF] as another SeqAIJ matrix.
2308 
2309    When d_nz, o_nz parameters are specified, d_nz storage elements are
2310    allocated for every row of the local diagonal submatrix, and o_nz
2311    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2312    One way to choose d_nz and o_nz is to use the max nonzerors per local
2313    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2314    In this case, the values of d_nz,o_nz are:
2315 .vb
2316      proc0 : dnz = 2, o_nz = 2
2317      proc1 : dnz = 3, o_nz = 2
2318      proc2 : dnz = 1, o_nz = 4
2319 .ve
2320    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2321    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2322    for proc3. i.e we are using 12+15+10=37 storage locations to store
2323    34 values.
2324 
2325    When d_nnz, o_nnz parameters are specified, the storage is specified
2326    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2327    In the above case the values for d_nnz,o_nnz are:
2328 .vb
2329      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2330      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2331      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2332 .ve
2333    Here the space allocated is sum of all the above values i.e 34, and
2334    hence pre-allocation is perfect.
2335 
2336    Level: intermediate
2337 
2338 .keywords: matrix, aij, compressed row, sparse, parallel
2339 
2340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2341 @*/
2342 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2343 {
2344   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2345 
2346   PetscFunctionBegin;
2347   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2348   if (f) {
2349     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2350   }
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatCreateMPIAIJ"
2356 /*@C
2357    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2358    (the default parallel PETSc format).  For good matrix assembly performance
2359    the user should preallocate the matrix storage by setting the parameters
2360    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2361    performance can be increased by more than a factor of 50.
2362 
2363    Collective on MPI_Comm
2364 
2365    Input Parameters:
2366 +  comm - MPI communicator
2367 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2368            This value should be the same as the local size used in creating the
2369            y vector for the matrix-vector product y = Ax.
2370 .  n - This value should be the same as the local size used in creating the
2371        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2372        calculated if N is given) For square matrices n is almost always m.
2373 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2374 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2375 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2376            (same value is used for all local rows)
2377 .  d_nnz - array containing the number of nonzeros in the various rows of the
2378            DIAGONAL portion of the local submatrix (possibly different for each row)
2379            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2380            The size of this array is equal to the number of local rows, i.e 'm'.
2381            You must leave room for the diagonal entry even if it is zero.
2382 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2383            submatrix (same value is used for all local rows).
2384 -  o_nnz - array containing the number of nonzeros in the various rows of the
2385            OFF-DIAGONAL portion of the local submatrix (possibly different for
2386            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2387            structure. The size of this array is equal to the number
2388            of local rows, i.e 'm'.
2389 
2390    Output Parameter:
2391 .  A - the matrix
2392 
2393    Notes:
2394    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2395    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2396    storage requirements for this matrix.
2397 
2398    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2399    processor than it must be used on all processors that share the object for
2400    that argument.
2401 
2402    The AIJ format (also called the Yale sparse matrix format or
2403    compressed row storage), is fully compatible with standard Fortran 77
2404    storage.  That is, the stored row and column indices can begin at
2405    either one (as in Fortran) or zero.  See the users manual for details.
2406 
2407    The user MUST specify either the local or global matrix dimensions
2408    (possibly both).
2409 
2410    The parallel matrix is partitioned such that the first m0 rows belong to
2411    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2412    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2413 
2414    The DIAGONAL portion of the local submatrix of a processor can be defined
2415    as the submatrix which is obtained by extraction the part corresponding
2416    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2417    first row that belongs to the processor, and r2 is the last row belonging
2418    to the this processor. This is a square mxm matrix. The remaining portion
2419    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2420 
2421    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2422 
2423    When calling this routine with a single process communicator, a matrix of
2424    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2425    type of communicator, use the construction mechanism:
2426      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2427 
2428    By default, this format uses inodes (identical nodes) when possible.
2429    We search for consecutive rows with the same nonzero structure, thereby
2430    reusing matrix information to achieve increased efficiency.
2431 
2432    Options Database Keys:
2433 +  -mat_aij_no_inode  - Do not use inodes
2434 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2435 -  -mat_aij_oneindex - Internally use indexing starting at 1
2436         rather than 0.  Note that when calling MatSetValues(),
2437         the user still MUST index entries starting at 0!
2438 
2439 
2440    Example usage:
2441 
2442    Consider the following 8x8 matrix with 34 non-zero values, that is
2443    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2444    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2445    as follows:
2446 
2447 .vb
2448             1  2  0  |  0  3  0  |  0  4
2449     Proc0   0  5  6  |  7  0  0  |  8  0
2450             9  0 10  | 11  0  0  | 12  0
2451     -------------------------------------
2452            13  0 14  | 15 16 17  |  0  0
2453     Proc1   0 18  0  | 19 20 21  |  0  0
2454             0  0  0  | 22 23  0  | 24  0
2455     -------------------------------------
2456     Proc2  25 26 27  |  0  0 28  | 29  0
2457            30  0  0  | 31 32 33  |  0 34
2458 .ve
2459 
2460    This can be represented as a collection of submatrices as:
2461 
2462 .vb
2463       A B C
2464       D E F
2465       G H I
2466 .ve
2467 
2468    Where the submatrices A,B,C are owned by proc0, D,E,F are
2469    owned by proc1, G,H,I are owned by proc2.
2470 
2471    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2472    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2473    The 'M','N' parameters are 8,8, and have the same values on all procs.
2474 
2475    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2476    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2477    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2478    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2479    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2480    matrix, ans [DF] as another SeqAIJ matrix.
2481 
2482    When d_nz, o_nz parameters are specified, d_nz storage elements are
2483    allocated for every row of the local diagonal submatrix, and o_nz
2484    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2485    One way to choose d_nz and o_nz is to use the max nonzerors per local
2486    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2487    In this case, the values of d_nz,o_nz are:
2488 .vb
2489      proc0 : dnz = 2, o_nz = 2
2490      proc1 : dnz = 3, o_nz = 2
2491      proc2 : dnz = 1, o_nz = 4
2492 .ve
2493    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2494    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2495    for proc3. i.e we are using 12+15+10=37 storage locations to store
2496    34 values.
2497 
2498    When d_nnz, o_nnz parameters are specified, the storage is specified
2499    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2500    In the above case the values for d_nnz,o_nnz are:
2501 .vb
2502      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2503      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2504      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2505 .ve
2506    Here the space allocated is sum of all the above values i.e 34, and
2507    hence pre-allocation is perfect.
2508 
2509    Level: intermediate
2510 
2511 .keywords: matrix, aij, compressed row, sparse, parallel
2512 
2513 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2514 @*/
2515 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2516 {
2517   int ierr,size;
2518 
2519   PetscFunctionBegin;
2520   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2521   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2522   if (size > 1) {
2523     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2524     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2525   } else {
2526     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2527     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2528   }
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 #undef __FUNCT__
2533 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2534 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2535 {
2536   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2537   PetscFunctionBegin;
2538   *Ad     = a->A;
2539   *Ao     = a->B;
2540   *colmap = a->garray;
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2546 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2547 {
2548   int        ierr,i;
2549   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2550 
2551   PetscFunctionBegin;
2552   if (coloring->ctype == IS_COLORING_LOCAL) {
2553     ISColoringValue *allcolors,*colors;
2554     ISColoring      ocoloring;
2555 
2556     /* set coloring for diagonal portion */
2557     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2558 
2559     /* set coloring for off-diagonal portion */
2560     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2561     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2562     for (i=0; i<a->B->n; i++) {
2563       colors[i] = allcolors[a->garray[i]];
2564     }
2565     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2566     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2567     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2568     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2569   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2570     ISColoringValue *colors;
2571     int             *larray;
2572     ISColoring      ocoloring;
2573 
2574     /* set coloring for diagonal portion */
2575     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2576     for (i=0; i<a->A->n; i++) {
2577       larray[i] = i + a->cstart;
2578     }
2579     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2580     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2581     for (i=0; i<a->A->n; i++) {
2582       colors[i] = coloring->colors[larray[i]];
2583     }
2584     ierr = PetscFree(larray);CHKERRQ(ierr);
2585     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2586     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2587     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2588 
2589     /* set coloring for off-diagonal portion */
2590     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2591     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2592     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2593     for (i=0; i<a->B->n; i++) {
2594       colors[i] = coloring->colors[larray[i]];
2595     }
2596     ierr = PetscFree(larray);CHKERRQ(ierr);
2597     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2598     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2599     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2600   } else {
2601     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2602   }
2603 
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2609 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2610 {
2611   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2612   int        ierr;
2613 
2614   PetscFunctionBegin;
2615   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2616   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 #undef __FUNCT__
2621 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2622 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2623 {
2624   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2625   int        ierr;
2626 
2627   PetscFunctionBegin;
2628   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2629   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatMerge"
2635 /*@C
2636       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2637                  matrices from each processor
2638 
2639     Collective on MPI_Comm
2640 
2641    Input Parameters:
2642 +    comm - the communicators the parallel matrix will live on
2643 -    inmat - the input sequential matrices
2644 
2645    Output Parameter:
2646 .    outmat - the parallel matrix generated
2647 
2648     Level: advanced
2649 
2650    Notes: The number of columns of the matrix in EACH of the seperate files
2651       MUST be the same.
2652 
2653 @*/
2654 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2655 {
2656   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2657   PetscScalar *values;
2658   PetscMap    columnmap,rowmap;
2659 
2660   PetscFunctionBegin;
2661 
2662   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2663 
2664   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2665   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2666   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2667   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2668   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2669   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2670 
2671   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2672   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2673   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2674   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2675   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2676 
2677   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2678   for (i=0;i<m;i++) {
2679     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2680     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2681     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2682   }
2683   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2684   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2685 
2686   for (i=0;i<m;i++) {
2687     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2688     I    = i + rstart;
2689     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2690     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2691   }
2692   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2693   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2694   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2695 
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatFileSplit"
2701 int MatFileSplit(Mat A,char *outfile)
2702 {
2703   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2704   PetscViewer out;
2705   char        *name;
2706   Mat         B;
2707   PetscScalar *values;
2708 
2709   PetscFunctionBegin;
2710 
2711   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2712   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2713   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2714   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2715   for (i=0;i<m;i++) {
2716     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2717     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2718     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2719   }
2720   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2721   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2722 
2723   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2724   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2725   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2726   sprintf(name,"%s.%d",outfile,rank);
2727   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2728   ierr = PetscFree(name);
2729   ierr = MatView(B,out);CHKERRQ(ierr);
2730   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2731   ierr = MatDestroy(B);CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734