xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0bad918300f840a56f43416dc3932aa97bd6fa72)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
289     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
294         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 
359   PetscFunctionBegin;
360   if (!aij->donotstash) {
361     while (1) {
362       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
363       if (!flg) break;
364 
365       for (i=0; i<n;) {
366         /* Now identify the consecutive vals belonging to the same row */
367         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
368         if (j < n) ncols = j-i;
369         else       ncols = n-i;
370         /* Now assemble all these values with a single function call */
371         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
372         i = j;
373       }
374     }
375     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
376   }
377 
378   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
379   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
380 
381   /* determine if any processor has disassembled, if so we must
382      also disassemble ourselfs, in order that we may reassemble. */
383   /*
384      if nonzero structure of submatrix B cannot change then we know that
385      no processor disassembled thus we can skip this stuff
386   */
387   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
388     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
389     if (mat->was_assembled && !other_disassembled) {
390       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
399 
400   if (aij->rowvalues) {
401     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
402     aij->rowvalues = 0;
403   }
404 
405   /* used by MatAXPY() */
406   a->xtoy = 0; b->xtoy = 0;
407   a->XtoY = 0; b->XtoY = 0;
408 
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
414 int MatZeroEntries_MPIAIJ(Mat A)
415 {
416   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
417   int        ierr;
418 
419   PetscFunctionBegin;
420   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
421   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatZeroRows_MPIAIJ"
427 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
428 {
429   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
430   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
431   int            *nprocs,j,idx,nsends,row;
432   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
433   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
434   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
435   MPI_Comm       comm = A->comm;
436   MPI_Request    *send_waits,*recv_waits;
437   MPI_Status     recv_status,*send_status;
438   IS             istmp;
439   PetscTruth     found;
440 
441   PetscFunctionBegin;
442   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
443   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
444 
445   /*  first count number of contributors to each processor */
446   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
447   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
448   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
449   for (i=0; i<N; i++) {
450     idx = rows[i];
451     found = PETSC_FALSE;
452     for (j=0; j<size; j++) {
453       if (idx >= owners[j] && idx < owners[j+1]) {
454         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
455       }
456     }
457     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
458   }
459   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
460 
461   /* inform other processors of number of messages and max length*/
462   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
463 
464   /* post receives:   */
465   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
466   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
467   for (i=0; i<nrecvs; i++) {
468     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
469   }
470 
471   /* do sends:
472       1) starts[i] gives the starting index in svalues for stuff going to
473          the ith processor
474   */
475   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
476   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
477   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
478   starts[0] = 0;
479   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   for (i=0; i<N; i++) {
481     svalues[starts[owner[i]]++] = rows[i];
482   }
483   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
484 
485   starts[0] = 0;
486   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
487   count = 0;
488   for (i=0; i<size; i++) {
489     if (nprocs[2*i+1]) {
490       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
491     }
492   }
493   ierr = PetscFree(starts);CHKERRQ(ierr);
494 
495   base = owners[rank];
496 
497   /*  wait on receives */
498   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
499   source = lens + nrecvs;
500   count  = nrecvs; slen = 0;
501   while (count) {
502     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
503     /* unpack receives into our local space */
504     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
505     source[imdex]  = recv_status.MPI_SOURCE;
506     lens[imdex]    = n;
507     slen          += n;
508     count--;
509   }
510   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
511 
512   /* move the data into the send scatter */
513   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
514   count = 0;
515   for (i=0; i<nrecvs; i++) {
516     values = rvalues + i*nmax;
517     for (j=0; j<lens[i]; j++) {
518       lrows[count++] = values[j] - base;
519     }
520   }
521   ierr = PetscFree(rvalues);CHKERRQ(ierr);
522   ierr = PetscFree(lens);CHKERRQ(ierr);
523   ierr = PetscFree(owner);CHKERRQ(ierr);
524   ierr = PetscFree(nprocs);CHKERRQ(ierr);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PetscLogObjectParent(A,istmp);
529 
530   /*
531         Zero the required rows. If the "diagonal block" of the matrix
532      is square and the user wishes to set the diagonal we use seperate
533      code so that MatSetValues() is not called for each diagonal allocating
534      new memory, thus calling lots of mallocs and slowing things down.
535 
536        Contributed by: Mathew Knepley
537   */
538   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
539   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
540   if (diag && (l->A->M == l->A->N)) {
541     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
542   } else if (diag) {
543     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
544     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
545       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
547     }
548     for (i = 0; i < slen; i++) {
549       row  = lrows[i] + rstart;
550       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
551     }
552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554   } else {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556   }
557   ierr = ISDestroy(istmp);CHKERRQ(ierr);
558   ierr = PetscFree(lrows);CHKERRQ(ierr);
559 
560   /* wait on sends */
561   if (nsends) {
562     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
563     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
564     ierr = PetscFree(send_status);CHKERRQ(ierr);
565   }
566   ierr = PetscFree(send_waits);CHKERRQ(ierr);
567   ierr = PetscFree(svalues);CHKERRQ(ierr);
568 
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatMult_MPIAIJ"
574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
575 {
576   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577   int        ierr,nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != A->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatMultAdd_MPIAIJ"
593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   int        ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 EXTERN_C_BEGIN
628 #undef __FUNCT__
629 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
630 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
631 {
632   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
633   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
634   IS         Me,Notme;
635   int        M,N,first,last,*notme,i, ierr;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643 
644   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
645   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
646   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
647   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
648   for (i=0; i<first; i++) notme[i] = i;
649   for (i=last; i<M; i++) notme[i-last+first] = i;
650   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
651   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
652   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
653   Aoff = Aoffs[0];
654   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
655   Boff = Boffs[0];
656   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
657   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
658   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
659   ierr = ISDestroy(Me); CHKERRQ(ierr);
660   ierr = ISDestroy(Notme); CHKERRQ(ierr);
661 
662   PetscFunctionReturn(0);
663 }
664 EXTERN_C_END
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
668 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
669 {
670   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
671   int        ierr;
672 
673   PetscFunctionBegin;
674   /* do nondiagonal part */
675   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
676   /* send it on its way */
677   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
678   /* do local part */
679   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
680   /* receive remote parts: note this assumes the values are not actually */
681   /* inserted in yy until the next line, which is true for my implementation*/
682   /* but is not perhaps always true. */
683   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 /*
688   This only works correctly for square matrices where the subblock A->A is the
689    diagonal block
690 */
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
693 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
694 {
695   int        ierr;
696   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
697 
698   PetscFunctionBegin;
699   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
700   if (a->rstart != a->cstart || a->rend != a->cend) {
701     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
702   }
703   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatScale_MPIAIJ"
709 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
710 {
711   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
712   int        ierr;
713 
714   PetscFunctionBegin;
715   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
716   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatDestroy_MPIAIJ"
722 int MatDestroy_MPIAIJ(Mat mat)
723 {
724   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
725   int        ierr;
726 
727   PetscFunctionBegin;
728 #if defined(PETSC_USE_LOG)
729   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
730 #endif
731   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
732   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
733   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
734   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
735 #if defined (PETSC_USE_CTABLE)
736   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
737 #else
738   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
739 #endif
740   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
741   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
742   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
743   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
744   ierr = PetscFree(aij);CHKERRQ(ierr);
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatView_MPIAIJ_Binary"
750 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
751 {
752   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
753   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
754   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
755   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
756   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
757   PetscScalar       *column_values;
758 
759   PetscFunctionBegin;
760   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
761   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
762   nz   = A->nz + B->nz;
763   if (rank == 0) {
764     header[0] = MAT_FILE_COOKIE;
765     header[1] = mat->M;
766     header[2] = mat->N;
767     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
768     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
769     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
770     /* get largest number of rows any processor has */
771     rlen = mat->m;
772     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
773     for (i=1; i<size; i++) {
774       rlen = PetscMax(rlen,range[i+1] - range[i]);
775     }
776   } else {
777     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
778     rlen = mat->m;
779   }
780 
781   /* load up the local row counts */
782   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
783   for (i=0; i<mat->m; i++) {
784     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
785   }
786 
787   /* store the row lengths to the file */
788   if (rank == 0) {
789     MPI_Status status;
790     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
791     for (i=1; i<size; i++) {
792       rlen = range[i+1] - range[i];
793       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
794       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
795     }
796   } else {
797     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
798   }
799   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
800 
801   /* load up the local column indices */
802   nzmax = nz; /* )th processor needs space a largest processor needs */
803   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
804   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
805   cnt  = 0;
806   for (i=0; i<mat->m; i++) {
807     for (j=B->i[i]; j<B->i[i+1]; j++) {
808       if ( (col = garray[B->j[j]]) > cstart) break;
809       column_indices[cnt++] = col;
810     }
811     for (k=A->i[i]; k<A->i[i+1]; k++) {
812       column_indices[cnt++] = A->j[k] + cstart;
813     }
814     for (; j<B->i[i+1]; j++) {
815       column_indices[cnt++] = garray[B->j[j]];
816     }
817   }
818   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
819 
820   /* store the column indices to the file */
821   if (rank == 0) {
822     MPI_Status status;
823     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
824     for (i=1; i<size; i++) {
825       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
826       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
827       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
828       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
829     }
830   } else {
831     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
832     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
833   }
834   ierr = PetscFree(column_indices);CHKERRQ(ierr);
835 
836   /* load up the local column values */
837   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
838   cnt  = 0;
839   for (i=0; i<mat->m; i++) {
840     for (j=B->i[i]; j<B->i[i+1]; j++) {
841       if ( garray[B->j[j]] > cstart) break;
842       column_values[cnt++] = B->a[j];
843     }
844     for (k=A->i[i]; k<A->i[i+1]; k++) {
845       column_values[cnt++] = A->a[k];
846     }
847     for (; j<B->i[i+1]; j++) {
848       column_values[cnt++] = B->a[j];
849     }
850   }
851   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
852 
853   /* store the column values to the file */
854   if (rank == 0) {
855     MPI_Status status;
856     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
857     for (i=1; i<size; i++) {
858       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
859       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
860       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
861       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
862     }
863   } else {
864     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
865     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
866   }
867   ierr = PetscFree(column_values);CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
873 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
874 {
875   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
876   int               ierr,rank = aij->rank,size = aij->size;
877   PetscTruth        isdraw,isascii,flg,isbinary;
878   PetscViewer       sviewer;
879   PetscViewerFormat format;
880 
881   PetscFunctionBegin;
882   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
883   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
884   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
885   if (isascii) {
886     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
887     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
888       MatInfo info;
889       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
890       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
891       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
892       if (flg) {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
894 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
895       } else {
896         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
897 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
898       }
899       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
901       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
902       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
903       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
904       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
905       PetscFunctionReturn(0);
906     } else if (format == PETSC_VIEWER_ASCII_INFO) {
907       PetscFunctionReturn(0);
908     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
909       PetscFunctionReturn(0);
910     }
911   } else if (isbinary) {
912     if (size == 1) {
913       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
914       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
915     } else {
916       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
917     }
918     PetscFunctionReturn(0);
919   } else if (isdraw) {
920     PetscDraw  draw;
921     PetscTruth isnull;
922     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
923     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
924   }
925 
926   if (size == 1) {
927     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
928     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
929   } else {
930     /* assemble the entire matrix onto first processor. */
931     Mat         A;
932     Mat_SeqAIJ *Aloc;
933     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
934     PetscScalar *a;
935 
936     if (!rank) {
937       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
938     } else {
939       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
940     }
941     PetscLogObjectParent(mat,A);
942 
943     /* copy over the A part */
944     Aloc = (Mat_SeqAIJ*)aij->A->data;
945     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
946     row = aij->rstart;
947     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
948     for (i=0; i<m; i++) {
949       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
950       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
951     }
952     aj = Aloc->j;
953     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
954 
955     /* copy over the B part */
956     Aloc = (Mat_SeqAIJ*)aij->B->data;
957     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
958     row  = aij->rstart;
959     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
960     ct   = cols;
961     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
962     for (i=0; i<m; i++) {
963       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
964       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
965     }
966     ierr = PetscFree(ct);CHKERRQ(ierr);
967     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969     /*
970        Everyone has to call to draw the matrix since the graphics waits are
971        synchronized across all processors that share the PetscDraw object
972     */
973     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
974     if (!rank) {
975       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
976       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
977     }
978     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
979     ierr = MatDestroy(A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatView_MPIAIJ"
986 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
987 {
988   int        ierr;
989   PetscTruth isascii,isdraw,issocket,isbinary;
990 
991   PetscFunctionBegin;
992   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
993   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
994   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
995   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
996   if (isascii || isdraw || isbinary || issocket) {
997     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
998   } else {
999     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatRelax_MPIAIJ"
1008 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1009 {
1010   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1011   int          ierr;
1012   Vec          bb1;
1013   PetscScalar  mone=-1.0;
1014 
1015   PetscFunctionBegin;
1016   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1017 
1018   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1019 
1020   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1021     if (flag & SOR_ZERO_INITIAL_GUESS) {
1022       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1023       its--;
1024     }
1025 
1026     while (its--) {
1027       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1028       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1029 
1030       /* update rhs: bb1 = bb - B*x */
1031       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1032       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1033 
1034       /* local sweep */
1035       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1036       CHKERRQ(ierr);
1037     }
1038   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1039     if (flag & SOR_ZERO_INITIAL_GUESS) {
1040       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1041       its--;
1042     }
1043     while (its--) {
1044       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1045       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1046 
1047       /* update rhs: bb1 = bb - B*x */
1048       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1049       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1050 
1051       /* local sweep */
1052       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1053       CHKERRQ(ierr);
1054     }
1055   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1056     if (flag & SOR_ZERO_INITIAL_GUESS) {
1057       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1058       its--;
1059     }
1060     while (its--) {
1061       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1063 
1064       /* update rhs: bb1 = bb - B*x */
1065       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1066       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1067 
1068       /* local sweep */
1069       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1070       CHKERRQ(ierr);
1071     }
1072   } else {
1073     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1074   }
1075 
1076   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1082 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1083 {
1084   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1085   Mat        A = mat->A,B = mat->B;
1086   int        ierr;
1087   PetscReal  isend[5],irecv[5];
1088 
1089   PetscFunctionBegin;
1090   info->block_size     = 1.0;
1091   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1093   isend[3] = info->memory;  isend[4] = info->mallocs;
1094   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1095   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1096   isend[3] += info->memory;  isend[4] += info->mallocs;
1097   if (flag == MAT_LOCAL) {
1098     info->nz_used      = isend[0];
1099     info->nz_allocated = isend[1];
1100     info->nz_unneeded  = isend[2];
1101     info->memory       = isend[3];
1102     info->mallocs      = isend[4];
1103   } else if (flag == MAT_GLOBAL_MAX) {
1104     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1105     info->nz_used      = irecv[0];
1106     info->nz_allocated = irecv[1];
1107     info->nz_unneeded  = irecv[2];
1108     info->memory       = irecv[3];
1109     info->mallocs      = irecv[4];
1110   } else if (flag == MAT_GLOBAL_SUM) {
1111     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1112     info->nz_used      = irecv[0];
1113     info->nz_allocated = irecv[1];
1114     info->nz_unneeded  = irecv[2];
1115     info->memory       = irecv[3];
1116     info->mallocs      = irecv[4];
1117   }
1118   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1119   info->fill_ratio_needed = 0;
1120   info->factor_mallocs    = 0;
1121   info->rows_global       = (double)matin->M;
1122   info->columns_global    = (double)matin->N;
1123   info->rows_local        = (double)matin->m;
1124   info->columns_local     = (double)matin->N;
1125 
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatSetOption_MPIAIJ"
1131 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1132 {
1133   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1134   int        ierr;
1135 
1136   PetscFunctionBegin;
1137   switch (op) {
1138   case MAT_NO_NEW_NONZERO_LOCATIONS:
1139   case MAT_YES_NEW_NONZERO_LOCATIONS:
1140   case MAT_COLUMNS_UNSORTED:
1141   case MAT_COLUMNS_SORTED:
1142   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1143   case MAT_KEEP_ZEROED_ROWS:
1144   case MAT_NEW_NONZERO_LOCATION_ERR:
1145   case MAT_USE_INODES:
1146   case MAT_DO_NOT_USE_INODES:
1147   case MAT_IGNORE_ZERO_ENTRIES:
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_ROW_ORIENTED:
1152     a->roworiented = PETSC_TRUE;
1153     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1154     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1155     break;
1156   case MAT_ROWS_SORTED:
1157   case MAT_ROWS_UNSORTED:
1158   case MAT_YES_NEW_DIAGONALS:
1159     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1160     break;
1161   case MAT_COLUMN_ORIENTED:
1162     a->roworiented = PETSC_FALSE;
1163     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1164     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1165     break;
1166   case MAT_IGNORE_OFF_PROC_ENTRIES:
1167     a->donotstash = PETSC_TRUE;
1168     break;
1169   case MAT_NO_NEW_DIAGONALS:
1170     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1171   default:
1172     SETERRQ(PETSC_ERR_SUP,"unknown option");
1173   }
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatGetRow_MPIAIJ"
1179 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1180 {
1181   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1182   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1183   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1184   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1185   int          *cmap,*idx_p;
1186 
1187   PetscFunctionBegin;
1188   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1189   mat->getrowactive = PETSC_TRUE;
1190 
1191   if (!mat->rowvalues && (idx || v)) {
1192     /*
1193         allocate enough space to hold information from the longest row.
1194     */
1195     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1196     int     max = 1,tmp;
1197     for (i=0; i<matin->m; i++) {
1198       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1199       if (max < tmp) { max = tmp; }
1200     }
1201     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1202     mat->rowindices = (int*)(mat->rowvalues + max);
1203   }
1204 
1205   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1206   lrow = row - rstart;
1207 
1208   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1209   if (!v)   {pvA = 0; pvB = 0;}
1210   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1211   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1212   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1213   nztot = nzA + nzB;
1214 
1215   cmap  = mat->garray;
1216   if (v  || idx) {
1217     if (nztot) {
1218       /* Sort by increasing column numbers, assuming A and B already sorted */
1219       int imark = -1;
1220       if (v) {
1221         *v = v_p = mat->rowvalues;
1222         for (i=0; i<nzB; i++) {
1223           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1224           else break;
1225         }
1226         imark = i;
1227         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1228         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1229       }
1230       if (idx) {
1231         *idx = idx_p = mat->rowindices;
1232         if (imark > -1) {
1233           for (i=0; i<imark; i++) {
1234             idx_p[i] = cmap[cworkB[i]];
1235           }
1236         } else {
1237           for (i=0; i<nzB; i++) {
1238             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1239             else break;
1240           }
1241           imark = i;
1242         }
1243         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1244         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1245       }
1246     } else {
1247       if (idx) *idx = 0;
1248       if (v)   *v   = 0;
1249     }
1250   }
1251   *nz = nztot;
1252   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1253   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1259 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1260 {
1261   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1262 
1263   PetscFunctionBegin;
1264   if (aij->getrowactive == PETSC_FALSE) {
1265     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1266   }
1267   aij->getrowactive = PETSC_FALSE;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatNorm_MPIAIJ"
1273 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1274 {
1275   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1276   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1277   int          ierr,i,j,cstart = aij->cstart;
1278   PetscReal    sum = 0.0;
1279   PetscScalar  *v;
1280 
1281   PetscFunctionBegin;
1282   if (aij->size == 1) {
1283     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1284   } else {
1285     if (type == NORM_FROBENIUS) {
1286       v = amat->a;
1287       for (i=0; i<amat->nz; i++) {
1288 #if defined(PETSC_USE_COMPLEX)
1289         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1290 #else
1291         sum += (*v)*(*v); v++;
1292 #endif
1293       }
1294       v = bmat->a;
1295       for (i=0; i<bmat->nz; i++) {
1296 #if defined(PETSC_USE_COMPLEX)
1297         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1298 #else
1299         sum += (*v)*(*v); v++;
1300 #endif
1301       }
1302       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1303       *norm = sqrt(*norm);
1304     } else if (type == NORM_1) { /* max column norm */
1305       PetscReal *tmp,*tmp2;
1306       int    *jj,*garray = aij->garray;
1307       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1308       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1309       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1310       *norm = 0.0;
1311       v = amat->a; jj = amat->j;
1312       for (j=0; j<amat->nz; j++) {
1313         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1314       }
1315       v = bmat->a; jj = bmat->j;
1316       for (j=0; j<bmat->nz; j++) {
1317         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1318       }
1319       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1320       for (j=0; j<mat->N; j++) {
1321         if (tmp2[j] > *norm) *norm = tmp2[j];
1322       }
1323       ierr = PetscFree(tmp);CHKERRQ(ierr);
1324       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1325     } else if (type == NORM_INFINITY) { /* max row norm */
1326       PetscReal ntemp = 0.0;
1327       for (j=0; j<aij->A->m; j++) {
1328         v = amat->a + amat->i[j];
1329         sum = 0.0;
1330         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1331           sum += PetscAbsScalar(*v); v++;
1332         }
1333         v = bmat->a + bmat->i[j];
1334         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1335           sum += PetscAbsScalar(*v); v++;
1336         }
1337         if (sum > ntemp) ntemp = sum;
1338       }
1339       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1340     } else {
1341       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1342     }
1343   }
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatTranspose_MPIAIJ"
1349 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1350 {
1351   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1352   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1353   int          ierr;
1354   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1355   Mat          B;
1356   PetscScalar  *array;
1357 
1358   PetscFunctionBegin;
1359   if (!matout && M != N) {
1360     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1361   }
1362 
1363   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1364 
1365   /* copy over the A part */
1366   Aloc = (Mat_SeqAIJ*)a->A->data;
1367   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1368   row = a->rstart;
1369   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1370   for (i=0; i<m; i++) {
1371     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1372     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1373   }
1374   aj = Aloc->j;
1375   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1376 
1377   /* copy over the B part */
1378   Aloc = (Mat_SeqAIJ*)a->B->data;
1379   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1380   row  = a->rstart;
1381   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1382   ct   = cols;
1383   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1384   for (i=0; i<m; i++) {
1385     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1386     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1387   }
1388   ierr = PetscFree(ct);CHKERRQ(ierr);
1389   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   if (matout) {
1392     *matout = B;
1393   } else {
1394     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1401 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1402 {
1403   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1404   Mat        a = aij->A,b = aij->B;
1405   int        ierr,s1,s2,s3;
1406 
1407   PetscFunctionBegin;
1408   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1409   if (rr) {
1410     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1411     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1412     /* Overlap communication with computation. */
1413     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1414   }
1415   if (ll) {
1416     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1417     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1418     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1419   }
1420   /* scale  the diagonal block */
1421   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1422 
1423   if (rr) {
1424     /* Do a scatter end and then right scale the off-diagonal block */
1425     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1426     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1427   }
1428 
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1435 int MatPrintHelp_MPIAIJ(Mat A)
1436 {
1437   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438   int        ierr;
1439 
1440   PetscFunctionBegin;
1441   if (!a->rank) {
1442     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1443   }
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1449 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1450 {
1451   PetscFunctionBegin;
1452   *bs = 1;
1453   PetscFunctionReturn(0);
1454 }
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1457 int MatSetUnfactored_MPIAIJ(Mat A)
1458 {
1459   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1460   int        ierr;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatEqual_MPIAIJ"
1469 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1470 {
1471   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1472   Mat        a,b,c,d;
1473   PetscTruth flg;
1474   int        ierr;
1475 
1476   PetscFunctionBegin;
1477   a = matA->A; b = matA->B;
1478   c = matB->A; d = matB->B;
1479 
1480   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1481   if (flg == PETSC_TRUE) {
1482     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1483   }
1484   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatCopy_MPIAIJ"
1490 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1491 {
1492   int        ierr;
1493   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1494   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1495 
1496   PetscFunctionBegin;
1497   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1498   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1499     /* because of the column compression in the off-processor part of the matrix a->B,
1500        the number of columns in a->B and b->B may be different, hence we cannot call
1501        the MatCopy() directly on the two parts. If need be, we can provide a more
1502        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1503        then copying the submatrices */
1504     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1505   } else {
1506     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1507     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1508   }
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1514 int MatSetUpPreallocation_MPIAIJ(Mat A)
1515 {
1516   int        ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1524 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS [],int);
1525 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1526 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
1527 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1528 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1529 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1530 #endif
1531 
1532 #include "petscblaslapack.h"
1533 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure);
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatAXPY_MPIAIJ"
1536 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1537 {
1538   int        ierr,one=1,i;
1539   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1540   Mat_SeqAIJ *x,*y;
1541 
1542   PetscFunctionBegin;
1543   if (str == SAME_NONZERO_PATTERN) {
1544     x = (Mat_SeqAIJ *)xx->A->data;
1545     y = (Mat_SeqAIJ *)yy->A->data;
1546     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1547     x = (Mat_SeqAIJ *)xx->B->data;
1548     y = (Mat_SeqAIJ *)yy->B->data;
1549     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1550   } else if (str == SUBSET_NONZERO_PATTERN) {
1551     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1552 
1553     x = (Mat_SeqAIJ *)xx->B->data;
1554     y = (Mat_SeqAIJ *)yy->B->data;
1555     if (y->xtoy && y->XtoY != xx->B) {
1556       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1557       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1558     }
1559     if (!y->xtoy) { /* get xtoy */
1560       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1561       y->XtoY = xx->B;
1562     }
1563     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1564   } else {
1565     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /* -------------------------------------------------------------------*/
1571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1572        MatGetRow_MPIAIJ,
1573        MatRestoreRow_MPIAIJ,
1574        MatMult_MPIAIJ,
1575        MatMultAdd_MPIAIJ,
1576        MatMultTranspose_MPIAIJ,
1577        MatMultTransposeAdd_MPIAIJ,
1578        0,
1579        0,
1580        0,
1581        0,
1582        0,
1583        0,
1584        MatRelax_MPIAIJ,
1585        MatTranspose_MPIAIJ,
1586        MatGetInfo_MPIAIJ,
1587        MatEqual_MPIAIJ,
1588        MatGetDiagonal_MPIAIJ,
1589        MatDiagonalScale_MPIAIJ,
1590        MatNorm_MPIAIJ,
1591        MatAssemblyBegin_MPIAIJ,
1592        MatAssemblyEnd_MPIAIJ,
1593        0,
1594        MatSetOption_MPIAIJ,
1595        MatZeroEntries_MPIAIJ,
1596        MatZeroRows_MPIAIJ,
1597 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1598        MatLUFactorSymbolic_MPIAIJ_TFS,
1599 #else
1600        0,
1601 #endif
1602        0,
1603        0,
1604        0,
1605        MatSetUpPreallocation_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609        0,
1610        MatDuplicate_MPIAIJ,
1611        0,
1612        0,
1613        0,
1614        0,
1615        MatAXPY_MPIAIJ,
1616        MatGetSubMatrices_MPIAIJ,
1617        MatIncreaseOverlap_MPIAIJ,
1618        MatGetValues_MPIAIJ,
1619        MatCopy_MPIAIJ,
1620        MatPrintHelp_MPIAIJ,
1621        MatScale_MPIAIJ,
1622        0,
1623        0,
1624        0,
1625        MatGetBlockSize_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        0,
1630        MatFDColoringCreate_MPIAIJ,
1631        0,
1632        MatSetUnfactored_MPIAIJ,
1633        0,
1634        0,
1635        MatGetSubMatrix_MPIAIJ,
1636        MatDestroy_MPIAIJ,
1637        MatView_MPIAIJ,
1638        MatGetPetscMaps_Petsc,
1639        0,
1640        0,
1641        0,
1642        0,
1643        0,
1644        0,
1645        0,
1646        0,
1647        MatSetColoring_MPIAIJ,
1648        MatSetValuesAdic_MPIAIJ,
1649        MatSetValuesAdifor_MPIAIJ
1650 };
1651 
1652 /* ----------------------------------------------------------------------------------------*/
1653 
1654 EXTERN_C_BEGIN
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1657 int MatStoreValues_MPIAIJ(Mat mat)
1658 {
1659   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1660   int        ierr;
1661 
1662   PetscFunctionBegin;
1663   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1664   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 EXTERN_C_END
1668 
1669 EXTERN_C_BEGIN
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1672 int MatRetrieveValues_MPIAIJ(Mat mat)
1673 {
1674   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1675   int        ierr;
1676 
1677   PetscFunctionBegin;
1678   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1679   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 EXTERN_C_END
1683 
1684 #include "petscpc.h"
1685 EXTERN_C_BEGIN
1686 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1687 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec);
1688 EXTERN_C_END
1689 
1690 EXTERN_C_BEGIN
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1693 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1694 {
1695   Mat_MPIAIJ   *b;
1696   int          ierr,i;
1697 
1698   PetscFunctionBegin;
1699   B->preallocated = PETSC_TRUE;
1700   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1701   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1702   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1703   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1704   if (d_nnz) {
1705     for (i=0; i<B->m; i++) {
1706       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1707     }
1708   }
1709   if (o_nnz) {
1710     for (i=0; i<B->m; i++) {
1711       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1712     }
1713   }
1714   b = (Mat_MPIAIJ*)B->data;
1715 
1716   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1717   PetscLogObjectParent(B,b->A);
1718   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1719   PetscLogObjectParent(B,b->B);
1720 
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 /*MC
1726    MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1727 
1728    Options Database Keys:
1729 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1730 
1731   Level: beginner
1732 
1733 .seealso: MatCreateMPIAIJ
1734 M*/
1735 
1736 EXTERN_C_BEGIN
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatCreate_MPIAIJ"
1739 int MatCreate_MPIAIJ(Mat B)
1740 {
1741   Mat_MPIAIJ *b;
1742   int        ierr,i,size;
1743 
1744   PetscFunctionBegin;
1745   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1746 
1747   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1748   B->data         = (void*)b;
1749   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1750   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1751   B->factor       = 0;
1752   B->assembled    = PETSC_FALSE;
1753   B->mapping      = 0;
1754 
1755   B->insertmode      = NOT_SET_VALUES;
1756   b->size            = size;
1757   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1758 
1759   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1760   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1761 
1762   /* the information in the maps duplicates the information computed below, eventually
1763      we should remove the duplicate information that is not contained in the maps */
1764   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1765   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1766 
1767   /* build local table of row and column ownerships */
1768   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1769   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1770   b->cowners = b->rowners + b->size + 2;
1771   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1772   b->rowners[0] = 0;
1773   for (i=2; i<=b->size; i++) {
1774     b->rowners[i] += b->rowners[i-1];
1775   }
1776   b->rstart = b->rowners[b->rank];
1777   b->rend   = b->rowners[b->rank+1];
1778   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1779   b->cowners[0] = 0;
1780   for (i=2; i<=b->size; i++) {
1781     b->cowners[i] += b->cowners[i-1];
1782   }
1783   b->cstart = b->cowners[b->rank];
1784   b->cend   = b->cowners[b->rank+1];
1785 
1786   /* build cache for off array entries formed */
1787   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1788   b->donotstash  = PETSC_FALSE;
1789   b->colmap      = 0;
1790   b->garray      = 0;
1791   b->roworiented = PETSC_TRUE;
1792 
1793   /* stuff used for matrix vector multiply */
1794   b->lvec      = PETSC_NULL;
1795   b->Mvctx     = PETSC_NULL;
1796 
1797   /* stuff for MatGetRow() */
1798   b->rowindices   = 0;
1799   b->rowvalues    = 0;
1800   b->getrowactive = PETSC_FALSE;
1801 
1802   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1803                                      "MatStoreValues_MPIAIJ",
1804                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1806                                      "MatRetrieveValues_MPIAIJ",
1807                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1809 				     "MatGetDiagonalBlock_MPIAIJ",
1810                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1812 				     "MatIsSymmetric_MPIAIJ",
1813 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1815 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1816 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1818 				     "MatDiagonalScaleLocal_MPIAIJ",
1819 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 EXTERN_C_END
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1826 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1827 {
1828   Mat        mat;
1829   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1830   int        ierr;
1831 
1832   PetscFunctionBegin;
1833   *newmat       = 0;
1834   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1835   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1836   a    = (Mat_MPIAIJ*)mat->data;
1837   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1838   mat->factor       = matin->factor;
1839   mat->assembled    = PETSC_TRUE;
1840   mat->insertmode   = NOT_SET_VALUES;
1841   mat->preallocated = PETSC_TRUE;
1842 
1843   a->rstart       = oldmat->rstart;
1844   a->rend         = oldmat->rend;
1845   a->cstart       = oldmat->cstart;
1846   a->cend         = oldmat->cend;
1847   a->size         = oldmat->size;
1848   a->rank         = oldmat->rank;
1849   a->donotstash   = oldmat->donotstash;
1850   a->roworiented  = oldmat->roworiented;
1851   a->rowindices   = 0;
1852   a->rowvalues    = 0;
1853   a->getrowactive = PETSC_FALSE;
1854 
1855   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1856   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1857   if (oldmat->colmap) {
1858 #if defined (PETSC_USE_CTABLE)
1859     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1860 #else
1861     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1862     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1863     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1864 #endif
1865   } else a->colmap = 0;
1866   if (oldmat->garray) {
1867     int len;
1868     len  = oldmat->B->n;
1869     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1870     PetscLogObjectMemory(mat,len*sizeof(int));
1871     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1872   } else a->garray = 0;
1873 
1874   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1875   PetscLogObjectParent(mat,a->lvec);
1876   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1877   PetscLogObjectParent(mat,a->Mvctx);
1878   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1879   PetscLogObjectParent(mat,a->A);
1880   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1881   PetscLogObjectParent(mat,a->B);
1882   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1883   *newmat = mat;
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #include "petscsys.h"
1888 
1889 EXTERN_C_BEGIN
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatLoad_MPIAIJ"
1892 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1893 {
1894   Mat          A;
1895   PetscScalar  *vals,*svals;
1896   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1897   MPI_Status   status;
1898   int          i,nz,ierr,j,rstart,rend,fd;
1899   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1900   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1901   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1902 
1903   PetscFunctionBegin;
1904   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1905   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1906   if (!rank) {
1907     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1908     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1909     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1910     if (header[3] < 0) {
1911       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1912     }
1913   }
1914 
1915   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1916   M = header[1]; N = header[2];
1917   /* determine ownership of all rows */
1918   m = M/size + ((M % size) > rank);
1919   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1920   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1921   rowners[0] = 0;
1922   for (i=2; i<=size; i++) {
1923     rowners[i] += rowners[i-1];
1924   }
1925   rstart = rowners[rank];
1926   rend   = rowners[rank+1];
1927 
1928   /* distribute row lengths to all processors */
1929   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1930   offlens = ourlens + (rend-rstart);
1931   if (!rank) {
1932     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1933     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1934     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1935     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1936     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1937     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1938   } else {
1939     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1940   }
1941 
1942   if (!rank) {
1943     /* calculate the number of nonzeros on each processor */
1944     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1945     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1946     for (i=0; i<size; i++) {
1947       for (j=rowners[i]; j< rowners[i+1]; j++) {
1948         procsnz[i] += rowlengths[j];
1949       }
1950     }
1951     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1952 
1953     /* determine max buffer needed and allocate it */
1954     maxnz = 0;
1955     for (i=0; i<size; i++) {
1956       maxnz = PetscMax(maxnz,procsnz[i]);
1957     }
1958     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1959 
1960     /* read in my part of the matrix column indices  */
1961     nz   = procsnz[0];
1962     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1963     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1964 
1965     /* read in every one elses and ship off */
1966     for (i=1; i<size; i++) {
1967       nz   = procsnz[i];
1968       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1969       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1970     }
1971     ierr = PetscFree(cols);CHKERRQ(ierr);
1972   } else {
1973     /* determine buffer space needed for message */
1974     nz = 0;
1975     for (i=0; i<m; i++) {
1976       nz += ourlens[i];
1977     }
1978     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1979 
1980     /* receive message of column indices*/
1981     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1982     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1983     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1984   }
1985 
1986   /* determine column ownership if matrix is not square */
1987   if (N != M) {
1988     n      = N/size + ((N % size) > rank);
1989     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1990     cstart = cend - n;
1991   } else {
1992     cstart = rstart;
1993     cend   = rend;
1994     n      = cend - cstart;
1995   }
1996 
1997   /* loop over local rows, determining number of off diagonal entries */
1998   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1999   jj = 0;
2000   for (i=0; i<m; i++) {
2001     for (j=0; j<ourlens[i]; j++) {
2002       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2003       jj++;
2004     }
2005   }
2006 
2007   /* create our matrix */
2008   for (i=0; i<m; i++) {
2009     ourlens[i] -= offlens[i];
2010   }
2011   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2012   ierr = MatSetType(A,type);CHKERRQ(ierr);
2013   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2014 
2015   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2016   for (i=0; i<m; i++) {
2017     ourlens[i] += offlens[i];
2018   }
2019 
2020   if (!rank) {
2021     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2022 
2023     /* read in my part of the matrix numerical values  */
2024     nz   = procsnz[0];
2025     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2026 
2027     /* insert into matrix */
2028     jj      = rstart;
2029     smycols = mycols;
2030     svals   = vals;
2031     for (i=0; i<m; i++) {
2032       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2033       smycols += ourlens[i];
2034       svals   += ourlens[i];
2035       jj++;
2036     }
2037 
2038     /* read in other processors and ship out */
2039     for (i=1; i<size; i++) {
2040       nz   = procsnz[i];
2041       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2042       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2043     }
2044     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2045   } else {
2046     /* receive numeric values */
2047     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2048 
2049     /* receive message of values*/
2050     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2051     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2052     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2053 
2054     /* insert into matrix */
2055     jj      = rstart;
2056     smycols = mycols;
2057     svals   = vals;
2058     for (i=0; i<m; i++) {
2059       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2060       smycols += ourlens[i];
2061       svals   += ourlens[i];
2062       jj++;
2063     }
2064   }
2065   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2066   ierr = PetscFree(vals);CHKERRQ(ierr);
2067   ierr = PetscFree(mycols);CHKERRQ(ierr);
2068   ierr = PetscFree(rowners);CHKERRQ(ierr);
2069 
2070   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2072   *newmat = A;
2073   PetscFunctionReturn(0);
2074 }
2075 EXTERN_C_END
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2079 /*
2080     Not great since it makes two copies of the submatrix, first an SeqAIJ
2081   in local and then by concatenating the local matrices the end result.
2082   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2083 */
2084 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2085 {
2086   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2087   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2088   Mat          *local,M,Mreuse;
2089   PetscScalar  *vwork,*aa;
2090   MPI_Comm     comm = mat->comm;
2091   Mat_SeqAIJ   *aij;
2092 
2093 
2094   PetscFunctionBegin;
2095   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2096   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2097 
2098   if (call ==  MAT_REUSE_MATRIX) {
2099     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2100     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2101     local = &Mreuse;
2102     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2103   } else {
2104     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2105     Mreuse = *local;
2106     ierr   = PetscFree(local);CHKERRQ(ierr);
2107   }
2108 
2109   /*
2110       m - number of local rows
2111       n - number of columns (same on all processors)
2112       rstart - first row in new global matrix generated
2113   */
2114   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2115   if (call == MAT_INITIAL_MATRIX) {
2116     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2117     ii  = aij->i;
2118     jj  = aij->j;
2119 
2120     /*
2121         Determine the number of non-zeros in the diagonal and off-diagonal
2122         portions of the matrix in order to do correct preallocation
2123     */
2124 
2125     /* first get start and end of "diagonal" columns */
2126     if (csize == PETSC_DECIDE) {
2127       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2128       if (mglobal == n) { /* square matrix */
2129 	nlocal = m;
2130       } else {
2131         nlocal = n/size + ((n % size) > rank);
2132       }
2133     } else {
2134       nlocal = csize;
2135     }
2136     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2137     rstart = rend - nlocal;
2138     if (rank == size - 1 && rend != n) {
2139       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2140     }
2141 
2142     /* next, compute all the lengths */
2143     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2144     olens = dlens + m;
2145     for (i=0; i<m; i++) {
2146       jend = ii[i+1] - ii[i];
2147       olen = 0;
2148       dlen = 0;
2149       for (j=0; j<jend; j++) {
2150         if (*jj < rstart || *jj >= rend) olen++;
2151         else dlen++;
2152         jj++;
2153       }
2154       olens[i] = olen;
2155       dlens[i] = dlen;
2156     }
2157     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2158     ierr = PetscFree(dlens);CHKERRQ(ierr);
2159   } else {
2160     int ml,nl;
2161 
2162     M = *newmat;
2163     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2164     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2165     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2166     /*
2167          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2168        rather than the slower MatSetValues().
2169     */
2170     M->was_assembled = PETSC_TRUE;
2171     M->assembled     = PETSC_FALSE;
2172   }
2173   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2174   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2175   ii  = aij->i;
2176   jj  = aij->j;
2177   aa  = aij->a;
2178   for (i=0; i<m; i++) {
2179     row   = rstart + i;
2180     nz    = ii[i+1] - ii[i];
2181     cwork = jj;     jj += nz;
2182     vwork = aa;     aa += nz;
2183     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2184   }
2185 
2186   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2187   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2188   *newmat = M;
2189 
2190   /* save submatrix used in processor for next request */
2191   if (call ==  MAT_INITIAL_MATRIX) {
2192     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2193     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2194   }
2195 
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2201 /*@C
2202    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2203    (the default parallel PETSc format).  For good matrix assembly performance
2204    the user should preallocate the matrix storage by setting the parameters
2205    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2206    performance can be increased by more than a factor of 50.
2207 
2208    Collective on MPI_Comm
2209 
2210    Input Parameters:
2211 +  A - the matrix
2212 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2213            (same value is used for all local rows)
2214 .  d_nnz - array containing the number of nonzeros in the various rows of the
2215            DIAGONAL portion of the local submatrix (possibly different for each row)
2216            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2217            The size of this array is equal to the number of local rows, i.e 'm'.
2218            You must leave room for the diagonal entry even if it is zero.
2219 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2220            submatrix (same value is used for all local rows).
2221 -  o_nnz - array containing the number of nonzeros in the various rows of the
2222            OFF-DIAGONAL portion of the local submatrix (possibly different for
2223            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2224            structure. The size of this array is equal to the number
2225            of local rows, i.e 'm'.
2226 
2227    The AIJ format (also called the Yale sparse matrix format or
2228    compressed row storage), is fully compatible with standard Fortran 77
2229    storage.  That is, the stored row and column indices can begin at
2230    either one (as in Fortran) or zero.  See the users manual for details.
2231 
2232    The user MUST specify either the local or global matrix dimensions
2233    (possibly both).
2234 
2235    The parallel matrix is partitioned such that the first m0 rows belong to
2236    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2237    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2238 
2239    The DIAGONAL portion of the local submatrix of a processor can be defined
2240    as the submatrix which is obtained by extraction the part corresponding
2241    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2242    first row that belongs to the processor, and r2 is the last row belonging
2243    to the this processor. This is a square mxm matrix. The remaining portion
2244    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2245 
2246    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2247 
2248    By default, this format uses inodes (identical nodes) when possible.
2249    We search for consecutive rows with the same nonzero structure, thereby
2250    reusing matrix information to achieve increased efficiency.
2251 
2252    Options Database Keys:
2253 +  -mat_aij_no_inode  - Do not use inodes
2254 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2255 -  -mat_aij_oneindex - Internally use indexing starting at 1
2256         rather than 0.  Note that when calling MatSetValues(),
2257         the user still MUST index entries starting at 0!
2258 
2259    Example usage:
2260 
2261    Consider the following 8x8 matrix with 34 non-zero values, that is
2262    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2263    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2264    as follows:
2265 
2266 .vb
2267             1  2  0  |  0  3  0  |  0  4
2268     Proc0   0  5  6  |  7  0  0  |  8  0
2269             9  0 10  | 11  0  0  | 12  0
2270     -------------------------------------
2271            13  0 14  | 15 16 17  |  0  0
2272     Proc1   0 18  0  | 19 20 21  |  0  0
2273             0  0  0  | 22 23  0  | 24  0
2274     -------------------------------------
2275     Proc2  25 26 27  |  0  0 28  | 29  0
2276            30  0  0  | 31 32 33  |  0 34
2277 .ve
2278 
2279    This can be represented as a collection of submatrices as:
2280 
2281 .vb
2282       A B C
2283       D E F
2284       G H I
2285 .ve
2286 
2287    Where the submatrices A,B,C are owned by proc0, D,E,F are
2288    owned by proc1, G,H,I are owned by proc2.
2289 
2290    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2291    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2292    The 'M','N' parameters are 8,8, and have the same values on all procs.
2293 
2294    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2295    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2296    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2297    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2298    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2299    matrix, ans [DF] as another SeqAIJ matrix.
2300 
2301    When d_nz, o_nz parameters are specified, d_nz storage elements are
2302    allocated for every row of the local diagonal submatrix, and o_nz
2303    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2304    One way to choose d_nz and o_nz is to use the max nonzerors per local
2305    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2306    In this case, the values of d_nz,o_nz are:
2307 .vb
2308      proc0 : dnz = 2, o_nz = 2
2309      proc1 : dnz = 3, o_nz = 2
2310      proc2 : dnz = 1, o_nz = 4
2311 .ve
2312    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2313    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2314    for proc3. i.e we are using 12+15+10=37 storage locations to store
2315    34 values.
2316 
2317    When d_nnz, o_nnz parameters are specified, the storage is specified
2318    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2319    In the above case the values for d_nnz,o_nnz are:
2320 .vb
2321      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2322      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2323      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2324 .ve
2325    Here the space allocated is sum of all the above values i.e 34, and
2326    hence pre-allocation is perfect.
2327 
2328    Level: intermediate
2329 
2330 .keywords: matrix, aij, compressed row, sparse, parallel
2331 
2332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2333 @*/
2334 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2335 {
2336   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2337 
2338   PetscFunctionBegin;
2339   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2340   if (f) {
2341     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2342   }
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatCreateMPIAIJ"
2348 /*@C
2349    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2350    (the default parallel PETSc format).  For good matrix assembly performance
2351    the user should preallocate the matrix storage by setting the parameters
2352    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2353    performance can be increased by more than a factor of 50.
2354 
2355    Collective on MPI_Comm
2356 
2357    Input Parameters:
2358 +  comm - MPI communicator
2359 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2360            This value should be the same as the local size used in creating the
2361            y vector for the matrix-vector product y = Ax.
2362 .  n - This value should be the same as the local size used in creating the
2363        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2364        calculated if N is given) For square matrices n is almost always m.
2365 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2366 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2367 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2368            (same value is used for all local rows)
2369 .  d_nnz - array containing the number of nonzeros in the various rows of the
2370            DIAGONAL portion of the local submatrix (possibly different for each row)
2371            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2372            The size of this array is equal to the number of local rows, i.e 'm'.
2373            You must leave room for the diagonal entry even if it is zero.
2374 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2375            submatrix (same value is used for all local rows).
2376 -  o_nnz - array containing the number of nonzeros in the various rows of the
2377            OFF-DIAGONAL portion of the local submatrix (possibly different for
2378            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2379            structure. The size of this array is equal to the number
2380            of local rows, i.e 'm'.
2381 
2382    Output Parameter:
2383 .  A - the matrix
2384 
2385    Notes:
2386    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2387    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2388    storage requirements for this matrix.
2389 
2390    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2391    processor than it must be used on all processors that share the object for
2392    that argument.
2393 
2394    The AIJ format (also called the Yale sparse matrix format or
2395    compressed row storage), is fully compatible with standard Fortran 77
2396    storage.  That is, the stored row and column indices can begin at
2397    either one (as in Fortran) or zero.  See the users manual for details.
2398 
2399    The user MUST specify either the local or global matrix dimensions
2400    (possibly both).
2401 
2402    The parallel matrix is partitioned such that the first m0 rows belong to
2403    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2404    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2405 
2406    The DIAGONAL portion of the local submatrix of a processor can be defined
2407    as the submatrix which is obtained by extraction the part corresponding
2408    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2409    first row that belongs to the processor, and r2 is the last row belonging
2410    to the this processor. This is a square mxm matrix. The remaining portion
2411    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2412 
2413    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2414 
2415    When calling this routine with a single process communicator, a matrix of
2416    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2417    type of communicator, use the construction mechanism:
2418      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2419 
2420    By default, this format uses inodes (identical nodes) when possible.
2421    We search for consecutive rows with the same nonzero structure, thereby
2422    reusing matrix information to achieve increased efficiency.
2423 
2424    Options Database Keys:
2425 +  -mat_aij_no_inode  - Do not use inodes
2426 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2427 -  -mat_aij_oneindex - Internally use indexing starting at 1
2428         rather than 0.  Note that when calling MatSetValues(),
2429         the user still MUST index entries starting at 0!
2430 
2431 
2432    Example usage:
2433 
2434    Consider the following 8x8 matrix with 34 non-zero values, that is
2435    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2436    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2437    as follows:
2438 
2439 .vb
2440             1  2  0  |  0  3  0  |  0  4
2441     Proc0   0  5  6  |  7  0  0  |  8  0
2442             9  0 10  | 11  0  0  | 12  0
2443     -------------------------------------
2444            13  0 14  | 15 16 17  |  0  0
2445     Proc1   0 18  0  | 19 20 21  |  0  0
2446             0  0  0  | 22 23  0  | 24  0
2447     -------------------------------------
2448     Proc2  25 26 27  |  0  0 28  | 29  0
2449            30  0  0  | 31 32 33  |  0 34
2450 .ve
2451 
2452    This can be represented as a collection of submatrices as:
2453 
2454 .vb
2455       A B C
2456       D E F
2457       G H I
2458 .ve
2459 
2460    Where the submatrices A,B,C are owned by proc0, D,E,F are
2461    owned by proc1, G,H,I are owned by proc2.
2462 
2463    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2464    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2465    The 'M','N' parameters are 8,8, and have the same values on all procs.
2466 
2467    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2468    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2469    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2470    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2471    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2472    matrix, ans [DF] as another SeqAIJ matrix.
2473 
2474    When d_nz, o_nz parameters are specified, d_nz storage elements are
2475    allocated for every row of the local diagonal submatrix, and o_nz
2476    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2477    One way to choose d_nz and o_nz is to use the max nonzerors per local
2478    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2479    In this case, the values of d_nz,o_nz are:
2480 .vb
2481      proc0 : dnz = 2, o_nz = 2
2482      proc1 : dnz = 3, o_nz = 2
2483      proc2 : dnz = 1, o_nz = 4
2484 .ve
2485    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2486    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2487    for proc3. i.e we are using 12+15+10=37 storage locations to store
2488    34 values.
2489 
2490    When d_nnz, o_nnz parameters are specified, the storage is specified
2491    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2492    In the above case the values for d_nnz,o_nnz are:
2493 .vb
2494      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2495      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2496      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2497 .ve
2498    Here the space allocated is sum of all the above values i.e 34, and
2499    hence pre-allocation is perfect.
2500 
2501    Level: intermediate
2502 
2503 .keywords: matrix, aij, compressed row, sparse, parallel
2504 
2505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2506 @*/
2507 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2508 {
2509   int ierr,size;
2510 
2511   PetscFunctionBegin;
2512   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2514   if (size > 1) {
2515     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2516     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2517   } else {
2518     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2519     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2526 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2527 {
2528   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2529   PetscFunctionBegin;
2530   *Ad     = a->A;
2531   *Ao     = a->B;
2532   *colmap = a->garray;
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2538 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2539 {
2540   int        ierr,i;
2541   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2542 
2543   PetscFunctionBegin;
2544   if (coloring->ctype == IS_COLORING_LOCAL) {
2545     ISColoringValue *allcolors,*colors;
2546     ISColoring      ocoloring;
2547 
2548     /* set coloring for diagonal portion */
2549     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2550 
2551     /* set coloring for off-diagonal portion */
2552     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2553     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2554     for (i=0; i<a->B->n; i++) {
2555       colors[i] = allcolors[a->garray[i]];
2556     }
2557     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2558     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2559     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2560     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2561   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2562     ISColoringValue *colors;
2563     int             *larray;
2564     ISColoring      ocoloring;
2565 
2566     /* set coloring for diagonal portion */
2567     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2568     for (i=0; i<a->A->n; i++) {
2569       larray[i] = i + a->cstart;
2570     }
2571     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2572     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2573     for (i=0; i<a->A->n; i++) {
2574       colors[i] = coloring->colors[larray[i]];
2575     }
2576     ierr = PetscFree(larray);CHKERRQ(ierr);
2577     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2578     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2579     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2580 
2581     /* set coloring for off-diagonal portion */
2582     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2583     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2584     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2585     for (i=0; i<a->B->n; i++) {
2586       colors[i] = coloring->colors[larray[i]];
2587     }
2588     ierr = PetscFree(larray);CHKERRQ(ierr);
2589     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2590     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2591     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2592   } else {
2593     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2594   }
2595 
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 #undef __FUNCT__
2600 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2601 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2602 {
2603   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2604   int        ierr;
2605 
2606   PetscFunctionBegin;
2607   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2608   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 #undef __FUNCT__
2613 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2614 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2615 {
2616   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2617   int        ierr;
2618 
2619   PetscFunctionBegin;
2620   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2621   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 #undef __FUNCT__
2626 #define __FUNCT__ "MatMerge"
2627 /*@C
2628       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2629                  matrices from each processor
2630 
2631     Collective on MPI_Comm
2632 
2633    Input Parameters:
2634 +    comm - the communicators the parallel matrix will live on
2635 -    inmat - the input sequential matrices
2636 
2637    Output Parameter:
2638 .    outmat - the parallel matrix generated
2639 
2640     Level: advanced
2641 
2642    Notes: The number of columns of the matrix in EACH of the seperate files
2643       MUST be the same.
2644 
2645 @*/
2646 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2647 {
2648   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2649   PetscScalar *values;
2650   PetscMap    columnmap,rowmap;
2651 
2652   PetscFunctionBegin;
2653 
2654   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2655 
2656   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2657   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2658   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2659   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2660   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2661   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2662 
2663   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2664   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2665   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2666   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2667   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2668 
2669   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2670   for (i=0;i<m;i++) {
2671     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2672     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2673     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2674   }
2675   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2676   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2677 
2678   for (i=0;i<m;i++) {
2679     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2680     I    = i + rstart;
2681     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2682     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2683   }
2684   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2685   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2686   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2687 
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatFileSplit"
2693 int MatFileSplit(Mat A,char *outfile)
2694 {
2695   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2696   PetscViewer out;
2697   char        *name;
2698   Mat         B;
2699   PetscScalar *values;
2700 
2701   PetscFunctionBegin;
2702 
2703   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2704   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2705   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2706   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2707   for (i=0;i<m;i++) {
2708     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2709     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2710     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2711   }
2712   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2713   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2714 
2715   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2716   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2717   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2718   sprintf(name,"%s.%d",outfile,rank);
2719   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2720   ierr = PetscFree(name);
2721   ierr = MatView(B,out);CHKERRQ(ierr);
2722   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2723   ierr = MatDestroy(B);CHKERRQ(ierr);
2724   PetscFunctionReturn(0);
2725 }
2726