xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d8fd42c4781ca24f1e2bf29bb52c76582f759cf5)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 int CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
18   int        n = aij->B->n,i,ierr;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
191   int          cstart = aij->cstart,cend = aij->cend,row,col;
192   PetscTruth   roworiented = aij->roworiented;
193 
194   /* Some Variables required in the macro */
195   Mat          A = aij->A;
196   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
197   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
198   PetscScalar  *aa = a->a;
199   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
200   Mat          B = aij->B;
201   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
202   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
203   PetscScalar  *ba = b->a;
204 
205   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
206   int          nonew = a->nonew,shift=0;
207   PetscScalar  *ap;
208 
209   PetscFunctionBegin;
210   for (i=0; i<m; i++) {
211     if (im[i] < 0) continue;
212 #if defined(PETSC_USE_BOPT_g)
213     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
214 #endif
215     if (im[i] >= rstart && im[i] < rend) {
216       row = im[i] - rstart;
217       for (j=0; j<n; j++) {
218         if (in[j] >= cstart && in[j] < cend){
219           col = in[j] - cstart;
220           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
221           if (ignorezeroentries && value == 0.0) continue;
222           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
223           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
224         } else if (in[j] < 0) continue;
225 #if defined(PETSC_USE_BOPT_g)
226         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
227 #endif
228         else {
229           if (mat->was_assembled) {
230             if (!aij->colmap) {
231               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
232             }
233 #if defined (PETSC_USE_CTABLE)
234             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
235 	    col--;
236 #else
237             col = aij->colmap[in[j]] - 1;
238 #endif
239             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
240               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
241               col =  in[j];
242               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
243               B = aij->B;
244               b = (Mat_SeqAIJ*)B->data;
245               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
246               ba = b->a;
247             }
248           } else col = in[j];
249           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
250           if (ignorezeroentries && value == 0.0) continue;
251           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (!aij->donotstash) {
257         if (roworiented) {
258           if (ignorezeroentries && v[i*n] == 0.0) continue;
259           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
260         } else {
261           if (ignorezeroentries && v[i] == 0.0) continue;
262           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
263         }
264       }
265     }
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatGetValues_MPIAIJ"
272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
275   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
276   int        cstart = aij->cstart,cend = aij->cend,row,col;
277 
278   PetscFunctionBegin;
279   for (i=0; i<m; i++) {
280     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
281     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
282     if (idxm[i] >= rstart && idxm[i] < rend) {
283       row = idxm[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
286         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
287         if (idxn[j] >= cstart && idxn[j] < cend){
288           col = idxn[j] - cstart;
289           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
290         } else {
291           if (!aij->colmap) {
292             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
293           }
294 #if defined (PETSC_USE_CTABLE)
295           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
296           col --;
297 #else
298           col = aij->colmap[idxn[j]] - 1;
299 #endif
300           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301           else {
302             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
303           }
304         }
305       }
306     } else {
307       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316 {
317   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
318   int         ierr,nstash,reallocs;
319   InsertMode  addv;
320 
321   PetscFunctionBegin;
322   if (aij->donotstash) {
323     PetscFunctionReturn(0);
324   }
325 
326   /* make sure all processors are either in INSERTMODE or ADDMODE */
327   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
328   if (addv == (ADD_VALUES|INSERT_VALUES)) {
329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
330   }
331   mat->insertmode = addv; /* in case this processor had no cache */
332 
333   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
334   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
335   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
336   PetscFunctionReturn(0);
337 }
338 
339 
340 #undef __FUNCT__
341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343 {
344   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
345   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
346   int         i,j,rstart,ncols,n,ierr,flg;
347   int         *row,*col,other_disassembled;
348   PetscScalar *val;
349   InsertMode  addv = mat->insertmode;
350 
351   PetscFunctionBegin;
352   if (!aij->donotstash) {
353     while (1) {
354       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
355       if (!flg) break;
356 
357       for (i=0; i<n;) {
358         /* Now identify the consecutive vals belonging to the same row */
359         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
360         if (j < n) ncols = j-i;
361         else       ncols = n-i;
362         /* Now assemble all these values with a single function call */
363         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
364         i = j;
365       }
366     }
367     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
368   }
369 
370   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
371   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
372 
373   /* determine if any processor has disassembled, if so we must
374      also disassemble ourselfs, in order that we may reassemble. */
375   /*
376      if nonzero structure of submatrix B cannot change then we know that
377      no processor disassembled thus we can skip this stuff
378   */
379   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
380     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
381     if (mat->was_assembled && !other_disassembled) {
382       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
383     }
384   }
385 
386   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
387     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
388   }
389   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
390   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
391 
392   if (aij->rowvalues) {
393     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
394     aij->rowvalues = 0;
395   }
396 
397   /* used by MatAXPY() */
398   a->xtoy = 0; b->xtoy = 0;
399   a->XtoY = 0; b->XtoY = 0;
400 
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
422   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
423   int            *nprocs,j,idx,nsends,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431   PetscTruth     found;
432 
433   PetscFunctionBegin;
434   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
435   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
436 
437   /*  first count number of contributors to each processor */
438   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
439   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
440   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
441   for (i=0; i<N; i++) {
442     idx = rows[i];
443     found = PETSC_FALSE;
444     for (j=0; j<size; j++) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
450   }
451   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
452 
453   /* inform other processors of number of messages and max length*/
454   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
455 
456   /* post receives:   */
457   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
458   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
459   for (i=0; i<nrecvs; i++) {
460     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
461   }
462 
463   /* do sends:
464       1) starts[i] gives the starting index in svalues for stuff going to
465          the ith processor
466   */
467   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
468   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
469   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
470   starts[0] = 0;
471   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
472   for (i=0; i<N; i++) {
473     svalues[starts[owner[i]]++] = rows[i];
474   }
475   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
476 
477   starts[0] = 0;
478   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
479   count = 0;
480   for (i=0; i<size; i++) {
481     if (nprocs[2*i+1]) {
482       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
483     }
484   }
485   ierr = PetscFree(starts);CHKERRQ(ierr);
486 
487   base = owners[rank];
488 
489   /*  wait on receives */
490   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
491   source = lens + nrecvs;
492   count  = nrecvs; slen = 0;
493   while (count) {
494     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
495     /* unpack receives into our local space */
496     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
497     source[imdex]  = recv_status.MPI_SOURCE;
498     lens[imdex]    = n;
499     slen          += n;
500     count--;
501   }
502   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
503 
504   /* move the data into the send scatter */
505   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
506   count = 0;
507   for (i=0; i<nrecvs; i++) {
508     values = rvalues + i*nmax;
509     for (j=0; j<lens[i]; j++) {
510       lrows[count++] = values[j] - base;
511     }
512   }
513   ierr = PetscFree(rvalues);CHKERRQ(ierr);
514   ierr = PetscFree(lens);CHKERRQ(ierr);
515   ierr = PetscFree(owner);CHKERRQ(ierr);
516   ierr = PetscFree(nprocs);CHKERRQ(ierr);
517 
518   /* actually zap the local rows */
519   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
520   PetscLogObjectParent(A,istmp);
521 
522   /*
523         Zero the required rows. If the "diagonal block" of the matrix
524      is square and the user wishes to set the diagonal we use seperate
525      code so that MatSetValues() is not called for each diagonal allocating
526      new memory, thus calling lots of mallocs and slowing things down.
527 
528        Contributed by: Mathew Knepley
529   */
530   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
531   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
532   if (diag && (l->A->M == l->A->N)) {
533     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
534   } else if (diag) {
535     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
536     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
537       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
539     }
540     for (i = 0; i < slen; i++) {
541       row  = lrows[i] + rstart;
542       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
543     }
544     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546   } else {
547     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
548   }
549   ierr = ISDestroy(istmp);CHKERRQ(ierr);
550   ierr = PetscFree(lrows);CHKERRQ(ierr);
551 
552   /* wait on sends */
553   if (nsends) {
554     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
555     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
556     ierr = PetscFree(send_status);CHKERRQ(ierr);
557   }
558   ierr = PetscFree(send_waits);CHKERRQ(ierr);
559   ierr = PetscFree(svalues);CHKERRQ(ierr);
560 
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatMult_MPIAIJ"
566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
567 {
568   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
569   int        ierr,nt;
570 
571   PetscFunctionBegin;
572   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
573   if (nt != A->n) {
574     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
575   }
576   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
577   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
578   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
579   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatMultAdd_MPIAIJ"
585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
586 {
587   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
588   int        ierr;
589 
590   PetscFunctionBegin;
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   /* do nondiagonal part */
607   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
608   /* send it on its way */
609   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
610   /* do local part */
611   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
612   /* receive remote parts: note this assumes the values are not actually */
613   /* inserted in yy until the next line, which is true for my implementation*/
614   /* but is not perhaps always true. */
615   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 EXTERN_C_BEGIN
620 #undef __FUNCT__
621 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
623 {
624   MPI_Comm comm;
625   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
626   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
627   IS         Me,Notme;
628   int        M,N,first,last,*notme,ntids,i, ierr;
629 
630   PetscFunctionBegin;
631 
632   /* Easy test: symmetric diagonal block */
633   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
634   ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr);
635   if (!*f) PetscFunctionReturn(0);
636   ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr);
637   ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr);
638   if (ntids==1) PetscFunctionReturn(0);
639 
640   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
641   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
642   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
643   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
644   for (i=0; i<first; i++) notme[i] = i;
645   for (i=last; i<M; i++) notme[i-last+first] = i;
646   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
647   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
648   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
649   Aoff = Aoffs[0];
650   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
651   Boff = Boffs[0];
652   ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr);
653   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
654   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
655   ierr = ISDestroy(Me); CHKERRQ(ierr);
656   ierr = ISDestroy(Notme); CHKERRQ(ierr);
657 
658   PetscFunctionReturn(0);
659 }
660 EXTERN_C_END
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
665 {
666   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
667   int        ierr;
668 
669   PetscFunctionBegin;
670   /* do nondiagonal part */
671   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
672   /* send it on its way */
673   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
674   /* do local part */
675   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
676   /* receive remote parts: note this assumes the values are not actually */
677   /* inserted in yy until the next line, which is true for my implementation*/
678   /* but is not perhaps always true. */
679   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 
683 /*
684   This only works correctly for square matrices where the subblock A->A is the
685    diagonal block
686 */
687 #undef __FUNCT__
688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
690 {
691   int        ierr;
692   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
693 
694   PetscFunctionBegin;
695   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
696   if (a->rstart != a->cstart || a->rend != a->cend) {
697     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
698   }
699   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatScale_MPIAIJ"
705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
706 {
707   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
708   int        ierr;
709 
710   PetscFunctionBegin;
711   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
712   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatDestroy_MPIAIJ"
718 int MatDestroy_MPIAIJ(Mat mat)
719 {
720   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
721   int        ierr;
722 
723   PetscFunctionBegin;
724 #if defined(PETSC_USE_LOG)
725   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
726 #endif
727   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
728   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
729   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
730   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
731 #if defined (PETSC_USE_CTABLE)
732   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
733 #else
734   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
735 #endif
736   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
737   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
738   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
739   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
740   ierr = PetscFree(aij);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatView_MPIAIJ_Binary"
746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
747 {
748   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
749   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
750   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
751   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
752   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (rank == 0) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->M;
762     header[2] = mat->N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->m;
768     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     rlen = mat->m;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->m; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (rank == 0) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->m; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (rank == 0) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->m; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (rank == 0) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   int               ierr,rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,isascii,flg,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (isascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo info;
885       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
886       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
887       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
888       if (flg) {
889         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
890 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
891       } else {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
893 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
894       }
895       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
896       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
897       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
899       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
900       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
901       PetscFunctionReturn(0);
902     } else if (format == PETSC_VIEWER_ASCII_INFO) {
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
905       PetscFunctionReturn(0);
906     }
907   } else if (isbinary) {
908     if (size == 1) {
909       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
910       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
911     } else {
912       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
913     }
914     PetscFunctionReturn(0);
915   } else if (isdraw) {
916     PetscDraw  draw;
917     PetscTruth isnull;
918     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
919     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
920   }
921 
922   if (size == 1) {
923     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
924     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
925   } else {
926     /* assemble the entire matrix onto first processor. */
927     Mat         A;
928     Mat_SeqAIJ *Aloc;
929     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
930     PetscScalar *a;
931 
932     if (!rank) {
933       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
934     } else {
935       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
936     }
937     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
938     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
939     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
940     PetscLogObjectParent(mat,A);
941 
942     /* copy over the A part */
943     Aloc = (Mat_SeqAIJ*)aij->A->data;
944     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
945     row = aij->rstart;
946     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
947     for (i=0; i<m; i++) {
948       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
949       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
950     }
951     aj = Aloc->j;
952     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
953 
954     /* copy over the B part */
955     Aloc = (Mat_SeqAIJ*)aij->B->data;
956     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
957     row  = aij->rstart;
958     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
959     ct   = cols;
960     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
961     for (i=0; i<m; i++) {
962       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
963       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
964     }
965     ierr = PetscFree(ct);CHKERRQ(ierr);
966     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     /*
969        Everyone has to call to draw the matrix since the graphics waits are
970        synchronized across all processors that share the PetscDraw object
971     */
972     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
975       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
978     ierr = MatDestroy(A);CHKERRQ(ierr);
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatView_MPIAIJ"
985 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
986 {
987   int        ierr;
988   PetscTruth isascii,isdraw,issocket,isbinary;
989 
990   PetscFunctionBegin;
991   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
992   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
993   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
994   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
995   if (isascii || isdraw || isbinary || issocket) {
996     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
997   } else {
998     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatRelax_MPIAIJ"
1007 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1008 {
1009   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1010   int          ierr;
1011   Vec          bb1;
1012   PetscScalar  mone=-1.0;
1013 
1014   PetscFunctionBegin;
1015   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1016 
1017   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1018 
1019   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1020     if (flag & SOR_ZERO_INITIAL_GUESS) {
1021       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1022       its--;
1023     }
1024 
1025     while (its--) {
1026       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1027       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1028 
1029       /* update rhs: bb1 = bb - B*x */
1030       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1031       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1032 
1033       /* local sweep */
1034       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1035       CHKERRQ(ierr);
1036     }
1037   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1038     if (flag & SOR_ZERO_INITIAL_GUESS) {
1039       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1040       its--;
1041     }
1042     while (its--) {
1043       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1044       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1045 
1046       /* update rhs: bb1 = bb - B*x */
1047       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1048       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1049 
1050       /* local sweep */
1051       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1052       CHKERRQ(ierr);
1053     }
1054   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1055     if (flag & SOR_ZERO_INITIAL_GUESS) {
1056       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1057       its--;
1058     }
1059     while (its--) {
1060       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1061       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062 
1063       /* update rhs: bb1 = bb - B*x */
1064       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1065       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1066 
1067       /* local sweep */
1068       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1069       CHKERRQ(ierr);
1070     }
1071   } else {
1072     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1073   }
1074 
1075   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1084   Mat        A = mat->A,B = mat->B;
1085   int        ierr;
1086   PetscReal  isend[5],irecv[5];
1087 
1088   PetscFunctionBegin;
1089   info->block_size     = 1.0;
1090   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1091   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1092   isend[3] = info->memory;  isend[4] = info->mallocs;
1093   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1094   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1095   isend[3] += info->memory;  isend[4] += info->mallocs;
1096   if (flag == MAT_LOCAL) {
1097     info->nz_used      = isend[0];
1098     info->nz_allocated = isend[1];
1099     info->nz_unneeded  = isend[2];
1100     info->memory       = isend[3];
1101     info->mallocs      = isend[4];
1102   } else if (flag == MAT_GLOBAL_MAX) {
1103     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1104     info->nz_used      = irecv[0];
1105     info->nz_allocated = irecv[1];
1106     info->nz_unneeded  = irecv[2];
1107     info->memory       = irecv[3];
1108     info->mallocs      = irecv[4];
1109   } else if (flag == MAT_GLOBAL_SUM) {
1110     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1111     info->nz_used      = irecv[0];
1112     info->nz_allocated = irecv[1];
1113     info->nz_unneeded  = irecv[2];
1114     info->memory       = irecv[3];
1115     info->mallocs      = irecv[4];
1116   }
1117   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1118   info->fill_ratio_needed = 0;
1119   info->factor_mallocs    = 0;
1120   info->rows_global       = (double)matin->M;
1121   info->columns_global    = (double)matin->N;
1122   info->rows_local        = (double)matin->m;
1123   info->columns_local     = (double)matin->N;
1124 
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatSetOption_MPIAIJ"
1130 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1131 {
1132   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1133   int        ierr;
1134 
1135   PetscFunctionBegin;
1136   switch (op) {
1137   case MAT_NO_NEW_NONZERO_LOCATIONS:
1138   case MAT_YES_NEW_NONZERO_LOCATIONS:
1139   case MAT_COLUMNS_UNSORTED:
1140   case MAT_COLUMNS_SORTED:
1141   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1142   case MAT_KEEP_ZEROED_ROWS:
1143   case MAT_NEW_NONZERO_LOCATION_ERR:
1144   case MAT_USE_INODES:
1145   case MAT_DO_NOT_USE_INODES:
1146   case MAT_IGNORE_ZERO_ENTRIES:
1147     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1148     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1149     break;
1150   case MAT_ROW_ORIENTED:
1151     a->roworiented = PETSC_TRUE;
1152     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1153     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1154     break;
1155   case MAT_ROWS_SORTED:
1156   case MAT_ROWS_UNSORTED:
1157   case MAT_YES_NEW_DIAGONALS:
1158     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1159     break;
1160   case MAT_COLUMN_ORIENTED:
1161     a->roworiented = PETSC_FALSE;
1162     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1163     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1164     break;
1165   case MAT_IGNORE_OFF_PROC_ENTRIES:
1166     a->donotstash = PETSC_TRUE;
1167     break;
1168   case MAT_NO_NEW_DIAGONALS:
1169     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1170   case MAT_SYMMETRIC:
1171   case MAT_STRUCTURALLY_SYMMETRIC:
1172   case MAT_HERMITIAN:
1173   case MAT_SYMMETRY_ETERNAL:
1174     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1175     break;
1176   case MAT_NOT_SYMMETRIC:
1177   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1178   case MAT_NOT_HERMITIAN:
1179   case MAT_NOT_SYMMETRY_ETERNAL:
1180     break;
1181   default:
1182     SETERRQ(PETSC_ERR_SUP,"unknown option");
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatGetRow_MPIAIJ"
1189 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1190 {
1191   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1193   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1194   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1195   int          *cmap,*idx_p;
1196 
1197   PetscFunctionBegin;
1198   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1199   mat->getrowactive = PETSC_TRUE;
1200 
1201   if (!mat->rowvalues && (idx || v)) {
1202     /*
1203         allocate enough space to hold information from the longest row.
1204     */
1205     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1206     int     max = 1,tmp;
1207     for (i=0; i<matin->m; i++) {
1208       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1209       if (max < tmp) { max = tmp; }
1210     }
1211     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1212     mat->rowindices = (int*)(mat->rowvalues + max);
1213   }
1214 
1215   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1216   lrow = row - rstart;
1217 
1218   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1219   if (!v)   {pvA = 0; pvB = 0;}
1220   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1221   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1222   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1223   nztot = nzA + nzB;
1224 
1225   cmap  = mat->garray;
1226   if (v  || idx) {
1227     if (nztot) {
1228       /* Sort by increasing column numbers, assuming A and B already sorted */
1229       int imark = -1;
1230       if (v) {
1231         *v = v_p = mat->rowvalues;
1232         for (i=0; i<nzB; i++) {
1233           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1234           else break;
1235         }
1236         imark = i;
1237         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1238         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1239       }
1240       if (idx) {
1241         *idx = idx_p = mat->rowindices;
1242         if (imark > -1) {
1243           for (i=0; i<imark; i++) {
1244             idx_p[i] = cmap[cworkB[i]];
1245           }
1246         } else {
1247           for (i=0; i<nzB; i++) {
1248             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1249             else break;
1250           }
1251           imark = i;
1252         }
1253         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1254         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1255       }
1256     } else {
1257       if (idx) *idx = 0;
1258       if (v)   *v   = 0;
1259     }
1260   }
1261   *nz = nztot;
1262   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1263   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1269 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1270 {
1271   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1272 
1273   PetscFunctionBegin;
1274   if (aij->getrowactive == PETSC_FALSE) {
1275     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1276   }
1277   aij->getrowactive = PETSC_FALSE;
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatNorm_MPIAIJ"
1283 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1284 {
1285   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1286   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1287   int          ierr,i,j,cstart = aij->cstart;
1288   PetscReal    sum = 0.0;
1289   PetscScalar  *v;
1290 
1291   PetscFunctionBegin;
1292   if (aij->size == 1) {
1293     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1294   } else {
1295     if (type == NORM_FROBENIUS) {
1296       v = amat->a;
1297       for (i=0; i<amat->nz; i++) {
1298 #if defined(PETSC_USE_COMPLEX)
1299         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1300 #else
1301         sum += (*v)*(*v); v++;
1302 #endif
1303       }
1304       v = bmat->a;
1305       for (i=0; i<bmat->nz; i++) {
1306 #if defined(PETSC_USE_COMPLEX)
1307         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1308 #else
1309         sum += (*v)*(*v); v++;
1310 #endif
1311       }
1312       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1313       *norm = sqrt(*norm);
1314     } else if (type == NORM_1) { /* max column norm */
1315       PetscReal *tmp,*tmp2;
1316       int    *jj,*garray = aij->garray;
1317       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1318       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1319       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1320       *norm = 0.0;
1321       v = amat->a; jj = amat->j;
1322       for (j=0; j<amat->nz; j++) {
1323         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1324       }
1325       v = bmat->a; jj = bmat->j;
1326       for (j=0; j<bmat->nz; j++) {
1327         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1328       }
1329       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1330       for (j=0; j<mat->N; j++) {
1331         if (tmp2[j] > *norm) *norm = tmp2[j];
1332       }
1333       ierr = PetscFree(tmp);CHKERRQ(ierr);
1334       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1335     } else if (type == NORM_INFINITY) { /* max row norm */
1336       PetscReal ntemp = 0.0;
1337       for (j=0; j<aij->A->m; j++) {
1338         v = amat->a + amat->i[j];
1339         sum = 0.0;
1340         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1341           sum += PetscAbsScalar(*v); v++;
1342         }
1343         v = bmat->a + bmat->i[j];
1344         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1345           sum += PetscAbsScalar(*v); v++;
1346         }
1347         if (sum > ntemp) ntemp = sum;
1348       }
1349       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1350     } else {
1351       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1352     }
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatTranspose_MPIAIJ"
1359 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1360 {
1361   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1362   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1363   int          ierr;
1364   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1365   Mat          B;
1366   PetscScalar  *array;
1367 
1368   PetscFunctionBegin;
1369   if (!matout && M != N) {
1370     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1371   }
1372 
1373   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1374   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1375   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1376 
1377   /* copy over the A part */
1378   Aloc = (Mat_SeqAIJ*)a->A->data;
1379   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1380   row = a->rstart;
1381   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1382   for (i=0; i<m; i++) {
1383     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1384     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1385   }
1386   aj = Aloc->j;
1387   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1388 
1389   /* copy over the B part */
1390   Aloc = (Mat_SeqAIJ*)a->B->data;
1391   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1392   row  = a->rstart;
1393   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1394   ct   = cols;
1395   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1396   for (i=0; i<m; i++) {
1397     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1398     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1399   }
1400   ierr = PetscFree(ct);CHKERRQ(ierr);
1401   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1403   if (matout) {
1404     *matout = B;
1405   } else {
1406     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1413 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1414 {
1415   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1416   Mat        a = aij->A,b = aij->B;
1417   int        ierr,s1,s2,s3;
1418 
1419   PetscFunctionBegin;
1420   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1421   if (rr) {
1422     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1423     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1424     /* Overlap communication with computation. */
1425     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1426   }
1427   if (ll) {
1428     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1429     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1430     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1431   }
1432   /* scale  the diagonal block */
1433   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1434 
1435   if (rr) {
1436     /* Do a scatter end and then right scale the off-diagonal block */
1437     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1438     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1439   }
1440 
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1447 int MatPrintHelp_MPIAIJ(Mat A)
1448 {
1449   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1450   int        ierr;
1451 
1452   PetscFunctionBegin;
1453   if (!a->rank) {
1454     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1461 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1462 {
1463   PetscFunctionBegin;
1464   *bs = 1;
1465   PetscFunctionReturn(0);
1466 }
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1469 int MatSetUnfactored_MPIAIJ(Mat A)
1470 {
1471   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1472   int        ierr;
1473 
1474   PetscFunctionBegin;
1475   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatEqual_MPIAIJ"
1481 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1482 {
1483   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1484   Mat        a,b,c,d;
1485   PetscTruth flg;
1486   int        ierr;
1487 
1488   PetscFunctionBegin;
1489   a = matA->A; b = matA->B;
1490   c = matB->A; d = matB->B;
1491 
1492   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1493   if (flg == PETSC_TRUE) {
1494     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1495   }
1496   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatCopy_MPIAIJ"
1502 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1503 {
1504   int        ierr;
1505   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1506   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1507 
1508   PetscFunctionBegin;
1509   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1510   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1511     /* because of the column compression in the off-processor part of the matrix a->B,
1512        the number of columns in a->B and b->B may be different, hence we cannot call
1513        the MatCopy() directly on the two parts. If need be, we can provide a more
1514        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1515        then copying the submatrices */
1516     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1517   } else {
1518     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1519     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1526 int MatSetUpPreallocation_MPIAIJ(Mat A)
1527 {
1528   int        ierr;
1529 
1530   PetscFunctionBegin;
1531   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #include "petscblaslapack.h"
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatAXPY_MPIAIJ"
1538 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1539 {
1540   int        ierr,one=1,i;
1541   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1542   Mat_SeqAIJ *x,*y;
1543 
1544   PetscFunctionBegin;
1545   if (str == SAME_NONZERO_PATTERN) {
1546     x = (Mat_SeqAIJ *)xx->A->data;
1547     y = (Mat_SeqAIJ *)yy->A->data;
1548     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1549     x = (Mat_SeqAIJ *)xx->B->data;
1550     y = (Mat_SeqAIJ *)yy->B->data;
1551     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1552   } else if (str == SUBSET_NONZERO_PATTERN) {
1553     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1554 
1555     x = (Mat_SeqAIJ *)xx->B->data;
1556     y = (Mat_SeqAIJ *)yy->B->data;
1557     if (y->xtoy && y->XtoY != xx->B) {
1558       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1559       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1560     }
1561     if (!y->xtoy) { /* get xtoy */
1562       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1563       y->XtoY = xx->B;
1564     }
1565     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1566   } else {
1567     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1568   }
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 /* -------------------------------------------------------------------*/
1573 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1574        MatGetRow_MPIAIJ,
1575        MatRestoreRow_MPIAIJ,
1576        MatMult_MPIAIJ,
1577 /* 4*/ MatMultAdd_MPIAIJ,
1578        MatMultTranspose_MPIAIJ,
1579        MatMultTransposeAdd_MPIAIJ,
1580        0,
1581        0,
1582        0,
1583 /*10*/ 0,
1584        0,
1585        0,
1586        MatRelax_MPIAIJ,
1587        MatTranspose_MPIAIJ,
1588 /*15*/ MatGetInfo_MPIAIJ,
1589        MatEqual_MPIAIJ,
1590        MatGetDiagonal_MPIAIJ,
1591        MatDiagonalScale_MPIAIJ,
1592        MatNorm_MPIAIJ,
1593 /*20*/ MatAssemblyBegin_MPIAIJ,
1594        MatAssemblyEnd_MPIAIJ,
1595        0,
1596        MatSetOption_MPIAIJ,
1597        MatZeroEntries_MPIAIJ,
1598 /*25*/ MatZeroRows_MPIAIJ,
1599 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1600        MatLUFactorSymbolic_MPIAIJ_TFS,
1601 #else
1602        0,
1603 #endif
1604        0,
1605        0,
1606        0,
1607 /*30*/ MatSetUpPreallocation_MPIAIJ,
1608        0,
1609        0,
1610        0,
1611        0,
1612 /*35*/ MatDuplicate_MPIAIJ,
1613        0,
1614        0,
1615        0,
1616        0,
1617 /*40*/ MatAXPY_MPIAIJ,
1618        MatGetSubMatrices_MPIAIJ,
1619        MatIncreaseOverlap_MPIAIJ,
1620        MatGetValues_MPIAIJ,
1621        MatCopy_MPIAIJ,
1622 /*45*/ MatPrintHelp_MPIAIJ,
1623        MatScale_MPIAIJ,
1624        0,
1625        0,
1626        0,
1627 /*50*/ MatGetBlockSize_MPIAIJ,
1628        0,
1629        0,
1630        0,
1631        0,
1632 /*55*/ MatFDColoringCreate_MPIAIJ,
1633        0,
1634        MatSetUnfactored_MPIAIJ,
1635        0,
1636        0,
1637 /*60*/ MatGetSubMatrix_MPIAIJ,
1638        MatDestroy_MPIAIJ,
1639        MatView_MPIAIJ,
1640        MatGetPetscMaps_Petsc,
1641        0,
1642 /*65*/ 0,
1643        0,
1644        0,
1645        0,
1646        0,
1647 /*70*/ 0,
1648        0,
1649        MatSetColoring_MPIAIJ,
1650        MatSetValuesAdic_MPIAIJ,
1651        MatSetValuesAdifor_MPIAIJ,
1652 /*75*/ 0,
1653        0,
1654        0,
1655        0,
1656        0,
1657 /*80*/ 0,
1658        0,
1659        0,
1660        0,
1661 /*85*/ MatLoad_MPIAIJ};
1662 
1663 /* ----------------------------------------------------------------------------------------*/
1664 
1665 EXTERN_C_BEGIN
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1668 int MatStoreValues_MPIAIJ(Mat mat)
1669 {
1670   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1671   int        ierr;
1672 
1673   PetscFunctionBegin;
1674   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1675   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1676   PetscFunctionReturn(0);
1677 }
1678 EXTERN_C_END
1679 
1680 EXTERN_C_BEGIN
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1683 int MatRetrieveValues_MPIAIJ(Mat mat)
1684 {
1685   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1686   int        ierr;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1690   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 EXTERN_C_END
1694 
1695 #include "petscpc.h"
1696 EXTERN_C_BEGIN
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1699 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1700 {
1701   Mat_MPIAIJ   *b;
1702   int          ierr,i;
1703 
1704   PetscFunctionBegin;
1705   B->preallocated = PETSC_TRUE;
1706   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1707   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1708   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1709   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1710   if (d_nnz) {
1711     for (i=0; i<B->m; i++) {
1712       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1713     }
1714   }
1715   if (o_nnz) {
1716     for (i=0; i<B->m; i++) {
1717       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1718     }
1719   }
1720   b = (Mat_MPIAIJ*)B->data;
1721   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1722   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1723 
1724   PetscFunctionReturn(0);
1725 }
1726 EXTERN_C_END
1727 
1728 /*MC
1729    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1730 
1731    Options Database Keys:
1732 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1733 
1734   Level: beginner
1735 
1736 .seealso: MatCreateMPIAIJ
1737 M*/
1738 
1739 EXTERN_C_BEGIN
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatCreate_MPIAIJ"
1742 int MatCreate_MPIAIJ(Mat B)
1743 {
1744   Mat_MPIAIJ *b;
1745   int        ierr,i,size;
1746 
1747   PetscFunctionBegin;
1748   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1749 
1750   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1751   B->data         = (void*)b;
1752   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1753   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1754   B->factor       = 0;
1755   B->assembled    = PETSC_FALSE;
1756   B->mapping      = 0;
1757 
1758   B->insertmode      = NOT_SET_VALUES;
1759   b->size            = size;
1760   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1761 
1762   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1763   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1764 
1765   /* the information in the maps duplicates the information computed below, eventually
1766      we should remove the duplicate information that is not contained in the maps */
1767   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1768   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1769 
1770   /* build local table of row and column ownerships */
1771   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1772   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1773   b->cowners = b->rowners + b->size + 2;
1774   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1775   b->rowners[0] = 0;
1776   for (i=2; i<=b->size; i++) {
1777     b->rowners[i] += b->rowners[i-1];
1778   }
1779   b->rstart = b->rowners[b->rank];
1780   b->rend   = b->rowners[b->rank+1];
1781   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1782   b->cowners[0] = 0;
1783   for (i=2; i<=b->size; i++) {
1784     b->cowners[i] += b->cowners[i-1];
1785   }
1786   b->cstart = b->cowners[b->rank];
1787   b->cend   = b->cowners[b->rank+1];
1788 
1789   /* build cache for off array entries formed */
1790   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1791   b->donotstash  = PETSC_FALSE;
1792   b->colmap      = 0;
1793   b->garray      = 0;
1794   b->roworiented = PETSC_TRUE;
1795 
1796   /* stuff used for matrix vector multiply */
1797   b->lvec      = PETSC_NULL;
1798   b->Mvctx     = PETSC_NULL;
1799 
1800   /* stuff for MatGetRow() */
1801   b->rowindices   = 0;
1802   b->rowvalues    = 0;
1803   b->getrowactive = PETSC_FALSE;
1804 
1805   /* Explicitly create 2 MATSEQAIJ matrices. */
1806   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1807   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1808   PetscLogObjectParent(B,b->A);
1809   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1810   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1811   PetscLogObjectParent(B,b->B);
1812 
1813   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1814                                      "MatStoreValues_MPIAIJ",
1815                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1817                                      "MatRetrieveValues_MPIAIJ",
1818                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1820 				     "MatGetDiagonalBlock_MPIAIJ",
1821                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1823 				     "MatIsTranspose_MPIAIJ",
1824 				     MatIsTranspose_MPIAIJ); CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1826 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1827 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1829 				     "MatDiagonalScaleLocal_MPIAIJ",
1830 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 /*MC
1836    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1837 
1838    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1839    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1840   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1841   for communicators controlling multiple processes.  It is recommended that you call both of
1842   the above preallocation routines for simplicity.
1843 
1844    Options Database Keys:
1845 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1846 
1847   Level: beginner
1848 
1849 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1850 M*/
1851 
1852 EXTERN_C_BEGIN
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatCreate_AIJ"
1855 int MatCreate_AIJ(Mat A) {
1856   int ierr,size;
1857 
1858   PetscFunctionBegin;
1859   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1860   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1861   if (size == 1) {
1862     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1863   } else {
1864     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 EXTERN_C_END
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1872 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1873 {
1874   Mat        mat;
1875   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1876   int        ierr;
1877 
1878   PetscFunctionBegin;
1879   *newmat       = 0;
1880   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1881   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1882   a    = (Mat_MPIAIJ*)mat->data;
1883 
1884   mat->factor       = matin->factor;
1885   mat->assembled    = PETSC_TRUE;
1886   mat->insertmode   = NOT_SET_VALUES;
1887   mat->preallocated = PETSC_TRUE;
1888 
1889   a->rstart       = oldmat->rstart;
1890   a->rend         = oldmat->rend;
1891   a->cstart       = oldmat->cstart;
1892   a->cend         = oldmat->cend;
1893   a->size         = oldmat->size;
1894   a->rank         = oldmat->rank;
1895   a->donotstash   = oldmat->donotstash;
1896   a->roworiented  = oldmat->roworiented;
1897   a->rowindices   = 0;
1898   a->rowvalues    = 0;
1899   a->getrowactive = PETSC_FALSE;
1900 
1901   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1902   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1903   if (oldmat->colmap) {
1904 #if defined (PETSC_USE_CTABLE)
1905     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1906 #else
1907     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1908     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1909     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1910 #endif
1911   } else a->colmap = 0;
1912   if (oldmat->garray) {
1913     int len;
1914     len  = oldmat->B->n;
1915     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1916     PetscLogObjectMemory(mat,len*sizeof(int));
1917     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1918   } else a->garray = 0;
1919 
1920   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1921   PetscLogObjectParent(mat,a->lvec);
1922   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1923   PetscLogObjectParent(mat,a->Mvctx);
1924   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1925   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1926   PetscLogObjectParent(mat,a->A);
1927   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1928   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1929   PetscLogObjectParent(mat,a->B);
1930   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1931   *newmat = mat;
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #include "petscsys.h"
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatLoad_MPIAIJ"
1939 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1940 {
1941   Mat          A;
1942   PetscScalar  *vals,*svals;
1943   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1944   MPI_Status   status;
1945   int          i,nz,ierr,j,rstart,rend,fd;
1946   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1947   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1948   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1949 
1950   PetscFunctionBegin;
1951   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1952   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1953   if (!rank) {
1954     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1955     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1956     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1957     if (header[3] < 0) {
1958       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1959     }
1960   }
1961 
1962   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1963   M = header[1]; N = header[2];
1964   /* determine ownership of all rows */
1965   m = M/size + ((M % size) > rank);
1966   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1967   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1968   rowners[0] = 0;
1969   for (i=2; i<=size; i++) {
1970     rowners[i] += rowners[i-1];
1971   }
1972   rstart = rowners[rank];
1973   rend   = rowners[rank+1];
1974 
1975   /* distribute row lengths to all processors */
1976   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1977   offlens = ourlens + (rend-rstart);
1978   if (!rank) {
1979     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1980     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1981     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1982     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1983     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1984     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1985   } else {
1986     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1987   }
1988 
1989   if (!rank) {
1990     /* calculate the number of nonzeros on each processor */
1991     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1992     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1993     for (i=0; i<size; i++) {
1994       for (j=rowners[i]; j< rowners[i+1]; j++) {
1995         procsnz[i] += rowlengths[j];
1996       }
1997     }
1998     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1999 
2000     /* determine max buffer needed and allocate it */
2001     maxnz = 0;
2002     for (i=0; i<size; i++) {
2003       maxnz = PetscMax(maxnz,procsnz[i]);
2004     }
2005     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2006 
2007     /* read in my part of the matrix column indices  */
2008     nz   = procsnz[0];
2009     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2010     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2011 
2012     /* read in every one elses and ship off */
2013     for (i=1; i<size; i++) {
2014       nz   = procsnz[i];
2015       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2016       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2017     }
2018     ierr = PetscFree(cols);CHKERRQ(ierr);
2019   } else {
2020     /* determine buffer space needed for message */
2021     nz = 0;
2022     for (i=0; i<m; i++) {
2023       nz += ourlens[i];
2024     }
2025     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2026 
2027     /* receive message of column indices*/
2028     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2029     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2030     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2031   }
2032 
2033   /* determine column ownership if matrix is not square */
2034   if (N != M) {
2035     n      = N/size + ((N % size) > rank);
2036     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2037     cstart = cend - n;
2038   } else {
2039     cstart = rstart;
2040     cend   = rend;
2041     n      = cend - cstart;
2042   }
2043 
2044   /* loop over local rows, determining number of off diagonal entries */
2045   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2046   jj = 0;
2047   for (i=0; i<m; i++) {
2048     for (j=0; j<ourlens[i]; j++) {
2049       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2050       jj++;
2051     }
2052   }
2053 
2054   /* create our matrix */
2055   for (i=0; i<m; i++) {
2056     ourlens[i] -= offlens[i];
2057   }
2058   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2059   ierr = MatSetType(A,type);CHKERRQ(ierr);
2060   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2061 
2062   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2063   for (i=0; i<m; i++) {
2064     ourlens[i] += offlens[i];
2065   }
2066 
2067   if (!rank) {
2068     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2069 
2070     /* read in my part of the matrix numerical values  */
2071     nz   = procsnz[0];
2072     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2073 
2074     /* insert into matrix */
2075     jj      = rstart;
2076     smycols = mycols;
2077     svals   = vals;
2078     for (i=0; i<m; i++) {
2079       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2080       smycols += ourlens[i];
2081       svals   += ourlens[i];
2082       jj++;
2083     }
2084 
2085     /* read in other processors and ship out */
2086     for (i=1; i<size; i++) {
2087       nz   = procsnz[i];
2088       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2089       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2090     }
2091     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2092   } else {
2093     /* receive numeric values */
2094     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2095 
2096     /* receive message of values*/
2097     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2098     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2099     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2100 
2101     /* insert into matrix */
2102     jj      = rstart;
2103     smycols = mycols;
2104     svals   = vals;
2105     for (i=0; i<m; i++) {
2106       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2107       smycols += ourlens[i];
2108       svals   += ourlens[i];
2109       jj++;
2110     }
2111   }
2112   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2113   ierr = PetscFree(vals);CHKERRQ(ierr);
2114   ierr = PetscFree(mycols);CHKERRQ(ierr);
2115   ierr = PetscFree(rowners);CHKERRQ(ierr);
2116 
2117   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2118   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   *newmat = A;
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2125 /*
2126     Not great since it makes two copies of the submatrix, first an SeqAIJ
2127   in local and then by concatenating the local matrices the end result.
2128   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2129 */
2130 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2131 {
2132   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2133   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2134   Mat          *local,M,Mreuse;
2135   PetscScalar  *vwork,*aa;
2136   MPI_Comm     comm = mat->comm;
2137   Mat_SeqAIJ   *aij;
2138 
2139 
2140   PetscFunctionBegin;
2141   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2142   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2143 
2144   if (call ==  MAT_REUSE_MATRIX) {
2145     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2146     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2147     local = &Mreuse;
2148     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2149   } else {
2150     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2151     Mreuse = *local;
2152     ierr   = PetscFree(local);CHKERRQ(ierr);
2153   }
2154 
2155   /*
2156       m - number of local rows
2157       n - number of columns (same on all processors)
2158       rstart - first row in new global matrix generated
2159   */
2160   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2161   if (call == MAT_INITIAL_MATRIX) {
2162     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2163     ii  = aij->i;
2164     jj  = aij->j;
2165 
2166     /*
2167         Determine the number of non-zeros in the diagonal and off-diagonal
2168         portions of the matrix in order to do correct preallocation
2169     */
2170 
2171     /* first get start and end of "diagonal" columns */
2172     if (csize == PETSC_DECIDE) {
2173       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2174       if (mglobal == n) { /* square matrix */
2175 	nlocal = m;
2176       } else {
2177         nlocal = n/size + ((n % size) > rank);
2178       }
2179     } else {
2180       nlocal = csize;
2181     }
2182     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2183     rstart = rend - nlocal;
2184     if (rank == size - 1 && rend != n) {
2185       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2186     }
2187 
2188     /* next, compute all the lengths */
2189     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2190     olens = dlens + m;
2191     for (i=0; i<m; i++) {
2192       jend = ii[i+1] - ii[i];
2193       olen = 0;
2194       dlen = 0;
2195       for (j=0; j<jend; j++) {
2196         if (*jj < rstart || *jj >= rend) olen++;
2197         else dlen++;
2198         jj++;
2199       }
2200       olens[i] = olen;
2201       dlens[i] = dlen;
2202     }
2203     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2204     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2205     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2206     ierr = PetscFree(dlens);CHKERRQ(ierr);
2207   } else {
2208     int ml,nl;
2209 
2210     M = *newmat;
2211     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2212     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2213     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2214     /*
2215          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2216        rather than the slower MatSetValues().
2217     */
2218     M->was_assembled = PETSC_TRUE;
2219     M->assembled     = PETSC_FALSE;
2220   }
2221   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2222   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2223   ii  = aij->i;
2224   jj  = aij->j;
2225   aa  = aij->a;
2226   for (i=0; i<m; i++) {
2227     row   = rstart + i;
2228     nz    = ii[i+1] - ii[i];
2229     cwork = jj;     jj += nz;
2230     vwork = aa;     aa += nz;
2231     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2232   }
2233 
2234   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2235   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2236   *newmat = M;
2237 
2238   /* save submatrix used in processor for next request */
2239   if (call ==  MAT_INITIAL_MATRIX) {
2240     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2241     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2242   }
2243 
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 #undef __FUNCT__
2248 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2249 /*@C
2250    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2251    (the default parallel PETSc format).  For good matrix assembly performance
2252    the user should preallocate the matrix storage by setting the parameters
2253    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2254    performance can be increased by more than a factor of 50.
2255 
2256    Collective on MPI_Comm
2257 
2258    Input Parameters:
2259 +  A - the matrix
2260 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2261            (same value is used for all local rows)
2262 .  d_nnz - array containing the number of nonzeros in the various rows of the
2263            DIAGONAL portion of the local submatrix (possibly different for each row)
2264            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2265            The size of this array is equal to the number of local rows, i.e 'm'.
2266            You must leave room for the diagonal entry even if it is zero.
2267 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2268            submatrix (same value is used for all local rows).
2269 -  o_nnz - array containing the number of nonzeros in the various rows of the
2270            OFF-DIAGONAL portion of the local submatrix (possibly different for
2271            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2272            structure. The size of this array is equal to the number
2273            of local rows, i.e 'm'.
2274 
2275    The AIJ format (also called the Yale sparse matrix format or
2276    compressed row storage), is fully compatible with standard Fortran 77
2277    storage.  That is, the stored row and column indices can begin at
2278    either one (as in Fortran) or zero.  See the users manual for details.
2279 
2280    The user MUST specify either the local or global matrix dimensions
2281    (possibly both).
2282 
2283    The parallel matrix is partitioned such that the first m0 rows belong to
2284    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2285    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2286 
2287    The DIAGONAL portion of the local submatrix of a processor can be defined
2288    as the submatrix which is obtained by extraction the part corresponding
2289    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2290    first row that belongs to the processor, and r2 is the last row belonging
2291    to the this processor. This is a square mxm matrix. The remaining portion
2292    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2293 
2294    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2295 
2296    By default, this format uses inodes (identical nodes) when possible.
2297    We search for consecutive rows with the same nonzero structure, thereby
2298    reusing matrix information to achieve increased efficiency.
2299 
2300    Options Database Keys:
2301 +  -mat_aij_no_inode  - Do not use inodes
2302 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2303 -  -mat_aij_oneindex - Internally use indexing starting at 1
2304         rather than 0.  Note that when calling MatSetValues(),
2305         the user still MUST index entries starting at 0!
2306 
2307    Example usage:
2308 
2309    Consider the following 8x8 matrix with 34 non-zero values, that is
2310    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2311    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2312    as follows:
2313 
2314 .vb
2315             1  2  0  |  0  3  0  |  0  4
2316     Proc0   0  5  6  |  7  0  0  |  8  0
2317             9  0 10  | 11  0  0  | 12  0
2318     -------------------------------------
2319            13  0 14  | 15 16 17  |  0  0
2320     Proc1   0 18  0  | 19 20 21  |  0  0
2321             0  0  0  | 22 23  0  | 24  0
2322     -------------------------------------
2323     Proc2  25 26 27  |  0  0 28  | 29  0
2324            30  0  0  | 31 32 33  |  0 34
2325 .ve
2326 
2327    This can be represented as a collection of submatrices as:
2328 
2329 .vb
2330       A B C
2331       D E F
2332       G H I
2333 .ve
2334 
2335    Where the submatrices A,B,C are owned by proc0, D,E,F are
2336    owned by proc1, G,H,I are owned by proc2.
2337 
2338    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2339    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2340    The 'M','N' parameters are 8,8, and have the same values on all procs.
2341 
2342    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2343    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2344    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2345    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2346    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2347    matrix, ans [DF] as another SeqAIJ matrix.
2348 
2349    When d_nz, o_nz parameters are specified, d_nz storage elements are
2350    allocated for every row of the local diagonal submatrix, and o_nz
2351    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2352    One way to choose d_nz and o_nz is to use the max nonzerors per local
2353    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2354    In this case, the values of d_nz,o_nz are:
2355 .vb
2356      proc0 : dnz = 2, o_nz = 2
2357      proc1 : dnz = 3, o_nz = 2
2358      proc2 : dnz = 1, o_nz = 4
2359 .ve
2360    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2361    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2362    for proc3. i.e we are using 12+15+10=37 storage locations to store
2363    34 values.
2364 
2365    When d_nnz, o_nnz parameters are specified, the storage is specified
2366    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2367    In the above case the values for d_nnz,o_nnz are:
2368 .vb
2369      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2370      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2371      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2372 .ve
2373    Here the space allocated is sum of all the above values i.e 34, and
2374    hence pre-allocation is perfect.
2375 
2376    Level: intermediate
2377 
2378 .keywords: matrix, aij, compressed row, sparse, parallel
2379 
2380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2381 @*/
2382 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2383 {
2384   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2385 
2386   PetscFunctionBegin;
2387   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2388   if (f) {
2389     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2390   }
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatCreateMPIAIJ"
2396 /*@C
2397    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2398    (the default parallel PETSc format).  For good matrix assembly performance
2399    the user should preallocate the matrix storage by setting the parameters
2400    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2401    performance can be increased by more than a factor of 50.
2402 
2403    Collective on MPI_Comm
2404 
2405    Input Parameters:
2406 +  comm - MPI communicator
2407 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2408            This value should be the same as the local size used in creating the
2409            y vector for the matrix-vector product y = Ax.
2410 .  n - This value should be the same as the local size used in creating the
2411        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2412        calculated if N is given) For square matrices n is almost always m.
2413 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2414 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2415 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2416            (same value is used for all local rows)
2417 .  d_nnz - array containing the number of nonzeros in the various rows of the
2418            DIAGONAL portion of the local submatrix (possibly different for each row)
2419            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2420            The size of this array is equal to the number of local rows, i.e 'm'.
2421            You must leave room for the diagonal entry even if it is zero.
2422 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2423            submatrix (same value is used for all local rows).
2424 -  o_nnz - array containing the number of nonzeros in the various rows of the
2425            OFF-DIAGONAL portion of the local submatrix (possibly different for
2426            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2427            structure. The size of this array is equal to the number
2428            of local rows, i.e 'm'.
2429 
2430    Output Parameter:
2431 .  A - the matrix
2432 
2433    Notes:
2434    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2435    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2436    storage requirements for this matrix.
2437 
2438    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2439    processor than it must be used on all processors that share the object for
2440    that argument.
2441 
2442    The AIJ format (also called the Yale sparse matrix format or
2443    compressed row storage), is fully compatible with standard Fortran 77
2444    storage.  That is, the stored row and column indices can begin at
2445    either one (as in Fortran) or zero.  See the users manual for details.
2446 
2447    The user MUST specify either the local or global matrix dimensions
2448    (possibly both).
2449 
2450    The parallel matrix is partitioned such that the first m0 rows belong to
2451    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2452    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2453 
2454    The DIAGONAL portion of the local submatrix of a processor can be defined
2455    as the submatrix which is obtained by extraction the part corresponding
2456    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2457    first row that belongs to the processor, and r2 is the last row belonging
2458    to the this processor. This is a square mxm matrix. The remaining portion
2459    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2460 
2461    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2462 
2463    When calling this routine with a single process communicator, a matrix of
2464    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2465    type of communicator, use the construction mechanism:
2466      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2467 
2468    By default, this format uses inodes (identical nodes) when possible.
2469    We search for consecutive rows with the same nonzero structure, thereby
2470    reusing matrix information to achieve increased efficiency.
2471 
2472    Options Database Keys:
2473 +  -mat_aij_no_inode  - Do not use inodes
2474 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2475 -  -mat_aij_oneindex - Internally use indexing starting at 1
2476         rather than 0.  Note that when calling MatSetValues(),
2477         the user still MUST index entries starting at 0!
2478 
2479 
2480    Example usage:
2481 
2482    Consider the following 8x8 matrix with 34 non-zero values, that is
2483    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2484    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2485    as follows:
2486 
2487 .vb
2488             1  2  0  |  0  3  0  |  0  4
2489     Proc0   0  5  6  |  7  0  0  |  8  0
2490             9  0 10  | 11  0  0  | 12  0
2491     -------------------------------------
2492            13  0 14  | 15 16 17  |  0  0
2493     Proc1   0 18  0  | 19 20 21  |  0  0
2494             0  0  0  | 22 23  0  | 24  0
2495     -------------------------------------
2496     Proc2  25 26 27  |  0  0 28  | 29  0
2497            30  0  0  | 31 32 33  |  0 34
2498 .ve
2499 
2500    This can be represented as a collection of submatrices as:
2501 
2502 .vb
2503       A B C
2504       D E F
2505       G H I
2506 .ve
2507 
2508    Where the submatrices A,B,C are owned by proc0, D,E,F are
2509    owned by proc1, G,H,I are owned by proc2.
2510 
2511    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2512    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2513    The 'M','N' parameters are 8,8, and have the same values on all procs.
2514 
2515    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2516    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2517    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2518    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2519    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2520    matrix, ans [DF] as another SeqAIJ matrix.
2521 
2522    When d_nz, o_nz parameters are specified, d_nz storage elements are
2523    allocated for every row of the local diagonal submatrix, and o_nz
2524    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2525    One way to choose d_nz and o_nz is to use the max nonzerors per local
2526    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2527    In this case, the values of d_nz,o_nz are:
2528 .vb
2529      proc0 : dnz = 2, o_nz = 2
2530      proc1 : dnz = 3, o_nz = 2
2531      proc2 : dnz = 1, o_nz = 4
2532 .ve
2533    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2534    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2535    for proc3. i.e we are using 12+15+10=37 storage locations to store
2536    34 values.
2537 
2538    When d_nnz, o_nnz parameters are specified, the storage is specified
2539    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2540    In the above case the values for d_nnz,o_nnz are:
2541 .vb
2542      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2543      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2544      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2545 .ve
2546    Here the space allocated is sum of all the above values i.e 34, and
2547    hence pre-allocation is perfect.
2548 
2549    Level: intermediate
2550 
2551 .keywords: matrix, aij, compressed row, sparse, parallel
2552 
2553 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2554 @*/
2555 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2556 {
2557   int ierr,size;
2558 
2559   PetscFunctionBegin;
2560   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2561   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2562   if (size > 1) {
2563     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2564     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2565   } else {
2566     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2567     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2568   }
2569   PetscFunctionReturn(0);
2570 }
2571 
2572 #undef __FUNCT__
2573 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2574 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2575 {
2576   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2577   PetscFunctionBegin;
2578   *Ad     = a->A;
2579   *Ao     = a->B;
2580   *colmap = a->garray;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2586 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2587 {
2588   int        ierr,i;
2589   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2590 
2591   PetscFunctionBegin;
2592   if (coloring->ctype == IS_COLORING_LOCAL) {
2593     ISColoringValue *allcolors,*colors;
2594     ISColoring      ocoloring;
2595 
2596     /* set coloring for diagonal portion */
2597     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2598 
2599     /* set coloring for off-diagonal portion */
2600     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2601     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2602     for (i=0; i<a->B->n; i++) {
2603       colors[i] = allcolors[a->garray[i]];
2604     }
2605     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2606     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2607     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2608     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2609   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2610     ISColoringValue *colors;
2611     int             *larray;
2612     ISColoring      ocoloring;
2613 
2614     /* set coloring for diagonal portion */
2615     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2616     for (i=0; i<a->A->n; i++) {
2617       larray[i] = i + a->cstart;
2618     }
2619     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2620     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2621     for (i=0; i<a->A->n; i++) {
2622       colors[i] = coloring->colors[larray[i]];
2623     }
2624     ierr = PetscFree(larray);CHKERRQ(ierr);
2625     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2626     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2627     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2628 
2629     /* set coloring for off-diagonal portion */
2630     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2631     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2632     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2633     for (i=0; i<a->B->n; i++) {
2634       colors[i] = coloring->colors[larray[i]];
2635     }
2636     ierr = PetscFree(larray);CHKERRQ(ierr);
2637     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2638     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2639     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2640   } else {
2641     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2642   }
2643 
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2649 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2650 {
2651   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2652   int        ierr;
2653 
2654   PetscFunctionBegin;
2655   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2656   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 #undef __FUNCT__
2661 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2662 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2663 {
2664   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2665   int        ierr;
2666 
2667   PetscFunctionBegin;
2668   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2669   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatMerge"
2675 /*@C
2676       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2677                  matrices from each processor
2678 
2679     Collective on MPI_Comm
2680 
2681    Input Parameters:
2682 +    comm - the communicators the parallel matrix will live on
2683 -    inmat - the input sequential matrices
2684 
2685    Output Parameter:
2686 .    outmat - the parallel matrix generated
2687 
2688     Level: advanced
2689 
2690    Notes: The number of columns of the matrix in EACH of the seperate files
2691       MUST be the same.
2692 
2693 @*/
2694 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2695 {
2696   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2697   PetscScalar *values;
2698   PetscMap    columnmap,rowmap;
2699 
2700   PetscFunctionBegin;
2701 
2702   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2703 
2704   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2705   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2706   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2707   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2708   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2709   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2710 
2711   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2712   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2713   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2714   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2715   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2716 
2717   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2718   for (i=0;i<m;i++) {
2719     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2720     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2721     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2722   }
2723   /* This routine will ONLY return MPIAIJ type matrix */
2724   ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2725   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2726   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2727   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2728 
2729   for (i=0;i<m;i++) {
2730     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2731     I    = i + rstart;
2732     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2733     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2734   }
2735   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2736   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2737   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2738 
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatFileSplit"
2744 int MatFileSplit(Mat A,char *outfile)
2745 {
2746   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2747   PetscViewer out;
2748   char        *name;
2749   Mat         B;
2750   PetscScalar *values;
2751 
2752   PetscFunctionBegin;
2753 
2754   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2755   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2756   /* Should this be the type of the diagonal block of A? */
2757   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2758   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2759   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2760   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2761   for (i=0;i<m;i++) {
2762     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2763     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2764     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2765   }
2766   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2767   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768 
2769   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2770   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2771   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2772   sprintf(name,"%s.%d",outfile,rank);
2773   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2774   ierr = PetscFree(name);
2775   ierr = MatView(B,out);CHKERRQ(ierr);
2776   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2777   ierr = MatDestroy(B);CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780