xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision de3625564d23a58eb9a13fca7a33afdebe168b34)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 int CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
18   int        n = aij->B->n,i,ierr;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
191   int          cstart = aij->cstart,cend = aij->cend,row,col;
192   PetscTruth   roworiented = aij->roworiented;
193 
194   /* Some Variables required in the macro */
195   Mat          A = aij->A;
196   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
197   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
198   PetscScalar  *aa = a->a;
199   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
200   Mat          B = aij->B;
201   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
202   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
203   PetscScalar  *ba = b->a;
204 
205   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
206   int          nonew = a->nonew,shift=0;
207   PetscScalar  *ap;
208 
209   PetscFunctionBegin;
210   for (i=0; i<m; i++) {
211     if (im[i] < 0) continue;
212 #if defined(PETSC_USE_BOPT_g)
213     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
214 #endif
215     if (im[i] >= rstart && im[i] < rend) {
216       row = im[i] - rstart;
217       for (j=0; j<n; j++) {
218         if (in[j] >= cstart && in[j] < cend){
219           col = in[j] - cstart;
220           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
221           if (ignorezeroentries && value == 0.0) continue;
222           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
223           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
224         } else if (in[j] < 0) continue;
225 #if defined(PETSC_USE_BOPT_g)
226         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
227 #endif
228         else {
229           if (mat->was_assembled) {
230             if (!aij->colmap) {
231               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
232             }
233 #if defined (PETSC_USE_CTABLE)
234             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
235 	    col--;
236 #else
237             col = aij->colmap[in[j]] - 1;
238 #endif
239             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
240               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
241               col =  in[j];
242               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
243               B = aij->B;
244               b = (Mat_SeqAIJ*)B->data;
245               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
246               ba = b->a;
247             }
248           } else col = in[j];
249           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
250           if (ignorezeroentries && value == 0.0) continue;
251           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (!aij->donotstash) {
257         if (roworiented) {
258           if (ignorezeroentries && v[i*n] == 0.0) continue;
259           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
260         } else {
261           if (ignorezeroentries && v[i] == 0.0) continue;
262           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
263         }
264       }
265     }
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatGetValues_MPIAIJ"
272 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
275   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
276   int        cstart = aij->cstart,cend = aij->cend,row,col;
277 
278   PetscFunctionBegin;
279   for (i=0; i<m; i++) {
280     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
281     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
282     if (idxm[i] >= rstart && idxm[i] < rend) {
283       row = idxm[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
286         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
287         if (idxn[j] >= cstart && idxn[j] < cend){
288           col = idxn[j] - cstart;
289           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
290         } else {
291           if (!aij->colmap) {
292             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
293           }
294 #if defined (PETSC_USE_CTABLE)
295           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
296           col --;
297 #else
298           col = aij->colmap[idxn[j]] - 1;
299 #endif
300           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
301           else {
302             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
303           }
304         }
305       }
306     } else {
307       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
316 {
317   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
318   int         ierr,nstash,reallocs;
319   InsertMode  addv;
320 
321   PetscFunctionBegin;
322   if (aij->donotstash) {
323     PetscFunctionReturn(0);
324   }
325 
326   /* make sure all processors are either in INSERTMODE or ADDMODE */
327   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
328   if (addv == (ADD_VALUES|INSERT_VALUES)) {
329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
330   }
331   mat->insertmode = addv; /* in case this processor had no cache */
332 
333   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
334   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
335   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
336   PetscFunctionReturn(0);
337 }
338 
339 
340 #undef __FUNCT__
341 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
343 {
344   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
345   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
346   int         i,j,rstart,ncols,n,ierr,flg;
347   int         *row,*col,other_disassembled;
348   PetscScalar *val;
349   InsertMode  addv = mat->insertmode;
350 
351   PetscFunctionBegin;
352   if (!aij->donotstash) {
353     while (1) {
354       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
355       if (!flg) break;
356 
357       for (i=0; i<n;) {
358         /* Now identify the consecutive vals belonging to the same row */
359         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
360         if (j < n) ncols = j-i;
361         else       ncols = n-i;
362         /* Now assemble all these values with a single function call */
363         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
364         i = j;
365       }
366     }
367     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
368   }
369 
370   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
371   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
372 
373   /* determine if any processor has disassembled, if so we must
374      also disassemble ourselfs, in order that we may reassemble. */
375   /*
376      if nonzero structure of submatrix B cannot change then we know that
377      no processor disassembled thus we can skip this stuff
378   */
379   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
380     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
381     if (mat->was_assembled && !other_disassembled) {
382       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
383     }
384   }
385 
386   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
387     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
388   }
389   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
390   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
391 
392   if (aij->rowvalues) {
393     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
394     aij->rowvalues = 0;
395   }
396 
397   /* used by MatAXPY() */
398   a->xtoy = 0; b->xtoy = 0;
399   a->XtoY = 0; b->XtoY = 0;
400 
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
406 int MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
409   int        ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatZeroRows_MPIAIJ"
419 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
422   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
423   int            *nprocs,j,idx,nsends,row;
424   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
425   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
426   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
427   MPI_Comm       comm = A->comm;
428   MPI_Request    *send_waits,*recv_waits;
429   MPI_Status     recv_status,*send_status;
430   IS             istmp;
431   PetscTruth     found;
432 
433   PetscFunctionBegin;
434   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
435   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
436 
437   /*  first count number of contributors to each processor */
438   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
439   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
440   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
441   for (i=0; i<N; i++) {
442     idx = rows[i];
443     found = PETSC_FALSE;
444     for (j=0; j<size; j++) {
445       if (idx >= owners[j] && idx < owners[j+1]) {
446         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
447       }
448     }
449     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
450   }
451   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
452 
453   /* inform other processors of number of messages and max length*/
454   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
455 
456   /* post receives:   */
457   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
458   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
459   for (i=0; i<nrecvs; i++) {
460     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
461   }
462 
463   /* do sends:
464       1) starts[i] gives the starting index in svalues for stuff going to
465          the ith processor
466   */
467   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
468   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
469   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
470   starts[0] = 0;
471   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
472   for (i=0; i<N; i++) {
473     svalues[starts[owner[i]]++] = rows[i];
474   }
475   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
476 
477   starts[0] = 0;
478   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
479   count = 0;
480   for (i=0; i<size; i++) {
481     if (nprocs[2*i+1]) {
482       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
483     }
484   }
485   ierr = PetscFree(starts);CHKERRQ(ierr);
486 
487   base = owners[rank];
488 
489   /*  wait on receives */
490   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
491   source = lens + nrecvs;
492   count  = nrecvs; slen = 0;
493   while (count) {
494     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
495     /* unpack receives into our local space */
496     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
497     source[imdex]  = recv_status.MPI_SOURCE;
498     lens[imdex]    = n;
499     slen          += n;
500     count--;
501   }
502   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
503 
504   /* move the data into the send scatter */
505   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
506   count = 0;
507   for (i=0; i<nrecvs; i++) {
508     values = rvalues + i*nmax;
509     for (j=0; j<lens[i]; j++) {
510       lrows[count++] = values[j] - base;
511     }
512   }
513   ierr = PetscFree(rvalues);CHKERRQ(ierr);
514   ierr = PetscFree(lens);CHKERRQ(ierr);
515   ierr = PetscFree(owner);CHKERRQ(ierr);
516   ierr = PetscFree(nprocs);CHKERRQ(ierr);
517 
518   /* actually zap the local rows */
519   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
520   PetscLogObjectParent(A,istmp);
521 
522   /*
523         Zero the required rows. If the "diagonal block" of the matrix
524      is square and the user wishes to set the diagonal we use seperate
525      code so that MatSetValues() is not called for each diagonal allocating
526      new memory, thus calling lots of mallocs and slowing things down.
527 
528        Contributed by: Mathew Knepley
529   */
530   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
531   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
532   if (diag && (l->A->M == l->A->N)) {
533     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
534   } else if (diag) {
535     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
536     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
537       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
538 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
539     }
540     for (i = 0; i < slen; i++) {
541       row  = lrows[i] + rstart;
542       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
543     }
544     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546   } else {
547     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
548   }
549   ierr = ISDestroy(istmp);CHKERRQ(ierr);
550   ierr = PetscFree(lrows);CHKERRQ(ierr);
551 
552   /* wait on sends */
553   if (nsends) {
554     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
555     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
556     ierr = PetscFree(send_status);CHKERRQ(ierr);
557   }
558   ierr = PetscFree(send_waits);CHKERRQ(ierr);
559   ierr = PetscFree(svalues);CHKERRQ(ierr);
560 
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatMult_MPIAIJ"
566 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
567 {
568   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
569   int        ierr,nt;
570 
571   PetscFunctionBegin;
572   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
573   if (nt != A->n) {
574     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
575   }
576   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
577   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
578   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
579   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatMultAdd_MPIAIJ"
585 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
586 {
587   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
588   int        ierr;
589 
590   PetscFunctionBegin;
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
600 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
601 {
602   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
603   int        ierr;
604 
605   PetscFunctionBegin;
606   /* do nondiagonal part */
607   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
608   /* send it on its way */
609   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
610   /* do local part */
611   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
612   /* receive remote parts: note this assumes the values are not actually */
613   /* inserted in yy until the next line, which is true for my implementation*/
614   /* but is not perhaps always true. */
615   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 EXTERN_C_BEGIN
620 #undef __FUNCT__
621 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
622 int MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
623 {
624   MPI_Comm comm;
625   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
626   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
627   IS         Me,Notme;
628   int        M,N,first,last,*notme,ntids,i, ierr;
629 
630   PetscFunctionBegin;
631 
632   /* Easy test: symmetric diagonal block */
633   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
634   ierr = MatIsTranspose(Adia,Bdia,f); CHKERRQ(ierr);
635   if (!*f) PetscFunctionReturn(0);
636   ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr);
637   ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr);
638   if (ntids==1) PetscFunctionReturn(0);
639 
640   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
641   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
642   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
643   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
644   for (i=0; i<first; i++) notme[i] = i;
645   for (i=last; i<M; i++) notme[i-last+first] = i;
646   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
647   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
648   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
649   Aoff = Aoffs[0];
650   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
651   Boff = Boffs[0];
652   ierr = MatIsTranspose(Aoff,Boff,f); CHKERRQ(ierr);
653   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
654   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
655   ierr = ISDestroy(Me); CHKERRQ(ierr);
656   ierr = ISDestroy(Notme); CHKERRQ(ierr);
657 
658   PetscFunctionReturn(0);
659 }
660 EXTERN_C_END
661 
662 #undef __FUNCT__
663 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
664 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
665 {
666   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
667   int        ierr;
668 
669   PetscFunctionBegin;
670   /* do nondiagonal part */
671   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
672   /* send it on its way */
673   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
674   /* do local part */
675   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
676   /* receive remote parts: note this assumes the values are not actually */
677   /* inserted in yy until the next line, which is true for my implementation*/
678   /* but is not perhaps always true. */
679   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 
683 /*
684   This only works correctly for square matrices where the subblock A->A is the
685    diagonal block
686 */
687 #undef __FUNCT__
688 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
689 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
690 {
691   int        ierr;
692   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
693 
694   PetscFunctionBegin;
695   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
696   if (a->rstart != a->cstart || a->rend != a->cend) {
697     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
698   }
699   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatScale_MPIAIJ"
705 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
706 {
707   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
708   int        ierr;
709 
710   PetscFunctionBegin;
711   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
712   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatDestroy_MPIAIJ"
718 int MatDestroy_MPIAIJ(Mat mat)
719 {
720   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
721   int        ierr;
722 
723   PetscFunctionBegin;
724 #if defined(PETSC_USE_LOG)
725   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
726 #endif
727   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
728   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
729   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
730   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
731 #if defined (PETSC_USE_CTABLE)
732   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
733 #else
734   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
735 #endif
736   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
737   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
738   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
739   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
740   ierr = PetscFree(aij);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatView_MPIAIJ_Binary"
746 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
747 {
748   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
749   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
750   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
751   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
752   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (rank == 0) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->M;
762     header[2] = mat->N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->m;
768     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     rlen = mat->m;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->m; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (rank == 0) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->m; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (rank == 0) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->m; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (rank == 0) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   int               ierr,rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,isascii,flg,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (isascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo info;
885       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
886       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
887       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
888       if (flg) {
889         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
890 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
891       } else {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
893 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
894       }
895       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
896       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
897       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
899       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
900       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
901       PetscFunctionReturn(0);
902     } else if (format == PETSC_VIEWER_ASCII_INFO) {
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
905       PetscFunctionReturn(0);
906     }
907   } else if (isbinary) {
908     if (size == 1) {
909       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
910       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
911     } else {
912       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
913     }
914     PetscFunctionReturn(0);
915   } else if (isdraw) {
916     PetscDraw  draw;
917     PetscTruth isnull;
918     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
919     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
920   }
921 
922   if (size == 1) {
923     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
924     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
925   } else {
926     /* assemble the entire matrix onto first processor. */
927     Mat         A;
928     Mat_SeqAIJ *Aloc;
929     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
930     PetscScalar *a;
931 
932     if (!rank) {
933       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
934     } else {
935       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
936     }
937     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
938     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
939     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
940     PetscLogObjectParent(mat,A);
941 
942     /* copy over the A part */
943     Aloc = (Mat_SeqAIJ*)aij->A->data;
944     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
945     row = aij->rstart;
946     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
947     for (i=0; i<m; i++) {
948       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
949       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
950     }
951     aj = Aloc->j;
952     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
953 
954     /* copy over the B part */
955     Aloc = (Mat_SeqAIJ*)aij->B->data;
956     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
957     row  = aij->rstart;
958     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
959     ct   = cols;
960     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
961     for (i=0; i<m; i++) {
962       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
963       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
964     }
965     ierr = PetscFree(ct);CHKERRQ(ierr);
966     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     /*
969        Everyone has to call to draw the matrix since the graphics waits are
970        synchronized across all processors that share the PetscDraw object
971     */
972     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
975       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
978     ierr = MatDestroy(A);CHKERRQ(ierr);
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatView_MPIAIJ"
985 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
986 {
987   int        ierr;
988   PetscTruth isascii,isdraw,issocket,isbinary;
989 
990   PetscFunctionBegin;
991   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
992   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
993   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
994   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
995   if (isascii || isdraw || isbinary || issocket) {
996     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
997   } else {
998     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatRelax_MPIAIJ"
1007 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1008 {
1009   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1010   int          ierr;
1011   Vec          bb1;
1012   PetscScalar  mone=-1.0;
1013 
1014   PetscFunctionBegin;
1015   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1016 
1017   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1018 
1019   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1020     if (flag & SOR_ZERO_INITIAL_GUESS) {
1021       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1022       its--;
1023     }
1024 
1025     while (its--) {
1026       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1027       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1028 
1029       /* update rhs: bb1 = bb - B*x */
1030       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1031       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1032 
1033       /* local sweep */
1034       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1035       CHKERRQ(ierr);
1036     }
1037   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1038     if (flag & SOR_ZERO_INITIAL_GUESS) {
1039       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1040       its--;
1041     }
1042     while (its--) {
1043       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1044       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1045 
1046       /* update rhs: bb1 = bb - B*x */
1047       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1048       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1049 
1050       /* local sweep */
1051       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1052       CHKERRQ(ierr);
1053     }
1054   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1055     if (flag & SOR_ZERO_INITIAL_GUESS) {
1056       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1057       its--;
1058     }
1059     while (its--) {
1060       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1061       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062 
1063       /* update rhs: bb1 = bb - B*x */
1064       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1065       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1066 
1067       /* local sweep */
1068       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1069       CHKERRQ(ierr);
1070     }
1071   } else {
1072     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1073   }
1074 
1075   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1081 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1082 {
1083   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1084   Mat        A = mat->A,B = mat->B;
1085   int        ierr;
1086   PetscReal  isend[5],irecv[5];
1087 
1088   PetscFunctionBegin;
1089   info->block_size     = 1.0;
1090   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1091   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1092   isend[3] = info->memory;  isend[4] = info->mallocs;
1093   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1094   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1095   isend[3] += info->memory;  isend[4] += info->mallocs;
1096   if (flag == MAT_LOCAL) {
1097     info->nz_used      = isend[0];
1098     info->nz_allocated = isend[1];
1099     info->nz_unneeded  = isend[2];
1100     info->memory       = isend[3];
1101     info->mallocs      = isend[4];
1102   } else if (flag == MAT_GLOBAL_MAX) {
1103     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1104     info->nz_used      = irecv[0];
1105     info->nz_allocated = irecv[1];
1106     info->nz_unneeded  = irecv[2];
1107     info->memory       = irecv[3];
1108     info->mallocs      = irecv[4];
1109   } else if (flag == MAT_GLOBAL_SUM) {
1110     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1111     info->nz_used      = irecv[0];
1112     info->nz_allocated = irecv[1];
1113     info->nz_unneeded  = irecv[2];
1114     info->memory       = irecv[3];
1115     info->mallocs      = irecv[4];
1116   }
1117   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1118   info->fill_ratio_needed = 0;
1119   info->factor_mallocs    = 0;
1120   info->rows_global       = (double)matin->M;
1121   info->columns_global    = (double)matin->N;
1122   info->rows_local        = (double)matin->m;
1123   info->columns_local     = (double)matin->N;
1124 
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatSetOption_MPIAIJ"
1130 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1131 {
1132   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1133   int        ierr;
1134 
1135   PetscFunctionBegin;
1136   switch (op) {
1137   case MAT_NO_NEW_NONZERO_LOCATIONS:
1138   case MAT_YES_NEW_NONZERO_LOCATIONS:
1139   case MAT_COLUMNS_UNSORTED:
1140   case MAT_COLUMNS_SORTED:
1141   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1142   case MAT_KEEP_ZEROED_ROWS:
1143   case MAT_NEW_NONZERO_LOCATION_ERR:
1144   case MAT_USE_INODES:
1145   case MAT_DO_NOT_USE_INODES:
1146   case MAT_IGNORE_ZERO_ENTRIES:
1147     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1148     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1149     break;
1150   case MAT_ROW_ORIENTED:
1151     a->roworiented = PETSC_TRUE;
1152     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1153     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1154     break;
1155   case MAT_ROWS_SORTED:
1156   case MAT_ROWS_UNSORTED:
1157   case MAT_YES_NEW_DIAGONALS:
1158     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1159     break;
1160   case MAT_COLUMN_ORIENTED:
1161     a->roworiented = PETSC_FALSE;
1162     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1163     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1164     break;
1165   case MAT_IGNORE_OFF_PROC_ENTRIES:
1166     a->donotstash = PETSC_TRUE;
1167     break;
1168   case MAT_NO_NEW_DIAGONALS:
1169     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1170   case MAT_SYMMETRIC:
1171   case MAT_STRUCTURALLY_SYMMETRIC:
1172   case MAT_NOT_SYMMETRIC:
1173   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1174   case MAT_HERMITIAN:
1175   case MAT_NOT_HERMITIAN:
1176   case MAT_SYMMETRY_ETERNAL:
1177   case MAT_NOT_SYMMETRY_ETERNAL:
1178     break;
1179   default:
1180     SETERRQ(PETSC_ERR_SUP,"unknown option");
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatGetRow_MPIAIJ"
1187 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1188 {
1189   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1190   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1191   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1192   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1193   int          *cmap,*idx_p;
1194 
1195   PetscFunctionBegin;
1196   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1197   mat->getrowactive = PETSC_TRUE;
1198 
1199   if (!mat->rowvalues && (idx || v)) {
1200     /*
1201         allocate enough space to hold information from the longest row.
1202     */
1203     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1204     int     max = 1,tmp;
1205     for (i=0; i<matin->m; i++) {
1206       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1207       if (max < tmp) { max = tmp; }
1208     }
1209     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1210     mat->rowindices = (int*)(mat->rowvalues + max);
1211   }
1212 
1213   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1214   lrow = row - rstart;
1215 
1216   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1217   if (!v)   {pvA = 0; pvB = 0;}
1218   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1219   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1220   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1221   nztot = nzA + nzB;
1222 
1223   cmap  = mat->garray;
1224   if (v  || idx) {
1225     if (nztot) {
1226       /* Sort by increasing column numbers, assuming A and B already sorted */
1227       int imark = -1;
1228       if (v) {
1229         *v = v_p = mat->rowvalues;
1230         for (i=0; i<nzB; i++) {
1231           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1232           else break;
1233         }
1234         imark = i;
1235         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1236         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1237       }
1238       if (idx) {
1239         *idx = idx_p = mat->rowindices;
1240         if (imark > -1) {
1241           for (i=0; i<imark; i++) {
1242             idx_p[i] = cmap[cworkB[i]];
1243           }
1244         } else {
1245           for (i=0; i<nzB; i++) {
1246             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1247             else break;
1248           }
1249           imark = i;
1250         }
1251         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1252         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1253       }
1254     } else {
1255       if (idx) *idx = 0;
1256       if (v)   *v   = 0;
1257     }
1258   }
1259   *nz = nztot;
1260   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1261   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1267 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1268 {
1269   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1270 
1271   PetscFunctionBegin;
1272   if (aij->getrowactive == PETSC_FALSE) {
1273     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1274   }
1275   aij->getrowactive = PETSC_FALSE;
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatNorm_MPIAIJ"
1281 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1282 {
1283   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1284   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1285   int          ierr,i,j,cstart = aij->cstart;
1286   PetscReal    sum = 0.0;
1287   PetscScalar  *v;
1288 
1289   PetscFunctionBegin;
1290   if (aij->size == 1) {
1291     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1292   } else {
1293     if (type == NORM_FROBENIUS) {
1294       v = amat->a;
1295       for (i=0; i<amat->nz; i++) {
1296 #if defined(PETSC_USE_COMPLEX)
1297         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1298 #else
1299         sum += (*v)*(*v); v++;
1300 #endif
1301       }
1302       v = bmat->a;
1303       for (i=0; i<bmat->nz; i++) {
1304 #if defined(PETSC_USE_COMPLEX)
1305         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1306 #else
1307         sum += (*v)*(*v); v++;
1308 #endif
1309       }
1310       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1311       *norm = sqrt(*norm);
1312     } else if (type == NORM_1) { /* max column norm */
1313       PetscReal *tmp,*tmp2;
1314       int    *jj,*garray = aij->garray;
1315       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1316       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1317       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1318       *norm = 0.0;
1319       v = amat->a; jj = amat->j;
1320       for (j=0; j<amat->nz; j++) {
1321         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1322       }
1323       v = bmat->a; jj = bmat->j;
1324       for (j=0; j<bmat->nz; j++) {
1325         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1326       }
1327       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1328       for (j=0; j<mat->N; j++) {
1329         if (tmp2[j] > *norm) *norm = tmp2[j];
1330       }
1331       ierr = PetscFree(tmp);CHKERRQ(ierr);
1332       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1333     } else if (type == NORM_INFINITY) { /* max row norm */
1334       PetscReal ntemp = 0.0;
1335       for (j=0; j<aij->A->m; j++) {
1336         v = amat->a + amat->i[j];
1337         sum = 0.0;
1338         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1339           sum += PetscAbsScalar(*v); v++;
1340         }
1341         v = bmat->a + bmat->i[j];
1342         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1343           sum += PetscAbsScalar(*v); v++;
1344         }
1345         if (sum > ntemp) ntemp = sum;
1346       }
1347       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1348     } else {
1349       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1350     }
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatTranspose_MPIAIJ"
1357 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1358 {
1359   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1360   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1361   int          ierr;
1362   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1363   Mat          B;
1364   PetscScalar  *array;
1365 
1366   PetscFunctionBegin;
1367   if (!matout && M != N) {
1368     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1369   }
1370 
1371   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1372   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1373   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1374 
1375   /* copy over the A part */
1376   Aloc = (Mat_SeqAIJ*)a->A->data;
1377   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1378   row = a->rstart;
1379   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1380   for (i=0; i<m; i++) {
1381     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1382     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1383   }
1384   aj = Aloc->j;
1385   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1386 
1387   /* copy over the B part */
1388   Aloc = (Mat_SeqAIJ*)a->B->data;
1389   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1390   row  = a->rstart;
1391   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1392   ct   = cols;
1393   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1394   for (i=0; i<m; i++) {
1395     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1396     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1397   }
1398   ierr = PetscFree(ct);CHKERRQ(ierr);
1399   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1400   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1401   if (matout) {
1402     *matout = B;
1403   } else {
1404     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1411 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1412 {
1413   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1414   Mat        a = aij->A,b = aij->B;
1415   int        ierr,s1,s2,s3;
1416 
1417   PetscFunctionBegin;
1418   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1419   if (rr) {
1420     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1421     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1422     /* Overlap communication with computation. */
1423     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1424   }
1425   if (ll) {
1426     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1427     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1428     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1429   }
1430   /* scale  the diagonal block */
1431   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1432 
1433   if (rr) {
1434     /* Do a scatter end and then right scale the off-diagonal block */
1435     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1436     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1437   }
1438 
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1445 int MatPrintHelp_MPIAIJ(Mat A)
1446 {
1447   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1448   int        ierr;
1449 
1450   PetscFunctionBegin;
1451   if (!a->rank) {
1452     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1459 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1460 {
1461   PetscFunctionBegin;
1462   *bs = 1;
1463   PetscFunctionReturn(0);
1464 }
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1467 int MatSetUnfactored_MPIAIJ(Mat A)
1468 {
1469   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1470   int        ierr;
1471 
1472   PetscFunctionBegin;
1473   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatEqual_MPIAIJ"
1479 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1480 {
1481   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1482   Mat        a,b,c,d;
1483   PetscTruth flg;
1484   int        ierr;
1485 
1486   PetscFunctionBegin;
1487   a = matA->A; b = matA->B;
1488   c = matB->A; d = matB->B;
1489 
1490   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1491   if (flg == PETSC_TRUE) {
1492     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1493   }
1494   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatCopy_MPIAIJ"
1500 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1501 {
1502   int        ierr;
1503   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1504   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1505 
1506   PetscFunctionBegin;
1507   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1508   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1509     /* because of the column compression in the off-processor part of the matrix a->B,
1510        the number of columns in a->B and b->B may be different, hence we cannot call
1511        the MatCopy() directly on the two parts. If need be, we can provide a more
1512        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1513        then copying the submatrices */
1514     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1515   } else {
1516     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1517     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1524 int MatSetUpPreallocation_MPIAIJ(Mat A)
1525 {
1526   int        ierr;
1527 
1528   PetscFunctionBegin;
1529   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #include "petscblaslapack.h"
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatAXPY_MPIAIJ"
1536 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1537 {
1538   int        ierr,one=1,i;
1539   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1540   Mat_SeqAIJ *x,*y;
1541 
1542   PetscFunctionBegin;
1543   if (str == SAME_NONZERO_PATTERN) {
1544     x = (Mat_SeqAIJ *)xx->A->data;
1545     y = (Mat_SeqAIJ *)yy->A->data;
1546     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1547     x = (Mat_SeqAIJ *)xx->B->data;
1548     y = (Mat_SeqAIJ *)yy->B->data;
1549     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1550   } else if (str == SUBSET_NONZERO_PATTERN) {
1551     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1552 
1553     x = (Mat_SeqAIJ *)xx->B->data;
1554     y = (Mat_SeqAIJ *)yy->B->data;
1555     if (y->xtoy && y->XtoY != xx->B) {
1556       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1557       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1558     }
1559     if (!y->xtoy) { /* get xtoy */
1560       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1561       y->XtoY = xx->B;
1562     }
1563     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1564   } else {
1565     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /* -------------------------------------------------------------------*/
1571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1572        MatGetRow_MPIAIJ,
1573        MatRestoreRow_MPIAIJ,
1574        MatMult_MPIAIJ,
1575 /* 4*/ MatMultAdd_MPIAIJ,
1576        MatMultTranspose_MPIAIJ,
1577        MatMultTransposeAdd_MPIAIJ,
1578        0,
1579        0,
1580        0,
1581 /*10*/ 0,
1582        0,
1583        0,
1584        MatRelax_MPIAIJ,
1585        MatTranspose_MPIAIJ,
1586 /*15*/ MatGetInfo_MPIAIJ,
1587        MatEqual_MPIAIJ,
1588        MatGetDiagonal_MPIAIJ,
1589        MatDiagonalScale_MPIAIJ,
1590        MatNorm_MPIAIJ,
1591 /*20*/ MatAssemblyBegin_MPIAIJ,
1592        MatAssemblyEnd_MPIAIJ,
1593        0,
1594        MatSetOption_MPIAIJ,
1595        MatZeroEntries_MPIAIJ,
1596 /*25*/ MatZeroRows_MPIAIJ,
1597 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1598        MatLUFactorSymbolic_MPIAIJ_TFS,
1599 #else
1600        0,
1601 #endif
1602        0,
1603        0,
1604        0,
1605 /*30*/ MatSetUpPreallocation_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609        0,
1610 /*35*/ MatDuplicate_MPIAIJ,
1611        0,
1612        0,
1613        0,
1614        0,
1615 /*40*/ MatAXPY_MPIAIJ,
1616        MatGetSubMatrices_MPIAIJ,
1617        MatIncreaseOverlap_MPIAIJ,
1618        MatGetValues_MPIAIJ,
1619        MatCopy_MPIAIJ,
1620 /*45*/ MatPrintHelp_MPIAIJ,
1621        MatScale_MPIAIJ,
1622        0,
1623        0,
1624        0,
1625 /*50*/ MatGetBlockSize_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        0,
1630 /*55*/ MatFDColoringCreate_MPIAIJ,
1631        0,
1632        MatSetUnfactored_MPIAIJ,
1633        0,
1634        0,
1635 /*60*/ MatGetSubMatrix_MPIAIJ,
1636        MatDestroy_MPIAIJ,
1637        MatView_MPIAIJ,
1638        MatGetPetscMaps_Petsc,
1639        0,
1640 /*65*/ 0,
1641        0,
1642        0,
1643        0,
1644        0,
1645 /*70*/ 0,
1646        0,
1647        MatSetColoring_MPIAIJ,
1648        MatSetValuesAdic_MPIAIJ,
1649        MatSetValuesAdifor_MPIAIJ,
1650 /*75*/ 0,
1651        0,
1652        0,
1653        0,
1654        0,
1655 /*80*/ 0,
1656        0,
1657        0,
1658        0,
1659 /*85*/ MatLoad_MPIAIJ};
1660 
1661 /* ----------------------------------------------------------------------------------------*/
1662 
1663 EXTERN_C_BEGIN
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1666 int MatStoreValues_MPIAIJ(Mat mat)
1667 {
1668   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1669   int        ierr;
1670 
1671   PetscFunctionBegin;
1672   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1673   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1674   PetscFunctionReturn(0);
1675 }
1676 EXTERN_C_END
1677 
1678 EXTERN_C_BEGIN
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1681 int MatRetrieveValues_MPIAIJ(Mat mat)
1682 {
1683   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1684   int        ierr;
1685 
1686   PetscFunctionBegin;
1687   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1688   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1689   PetscFunctionReturn(0);
1690 }
1691 EXTERN_C_END
1692 
1693 #include "petscpc.h"
1694 EXTERN_C_BEGIN
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1697 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1698 {
1699   Mat_MPIAIJ   *b;
1700   int          ierr,i;
1701 
1702   PetscFunctionBegin;
1703   B->preallocated = PETSC_TRUE;
1704   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1705   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1706   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1707   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1708   if (d_nnz) {
1709     for (i=0; i<B->m; i++) {
1710       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1711     }
1712   }
1713   if (o_nnz) {
1714     for (i=0; i<B->m; i++) {
1715       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1716     }
1717   }
1718   b = (Mat_MPIAIJ*)B->data;
1719   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1720   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1721 
1722   PetscFunctionReturn(0);
1723 }
1724 EXTERN_C_END
1725 
1726 /*MC
1727    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1728 
1729    Options Database Keys:
1730 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1731 
1732   Level: beginner
1733 
1734 .seealso: MatCreateMPIAIJ
1735 M*/
1736 
1737 EXTERN_C_BEGIN
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatCreate_MPIAIJ"
1740 int MatCreate_MPIAIJ(Mat B)
1741 {
1742   Mat_MPIAIJ *b;
1743   int        ierr,i,size;
1744 
1745   PetscFunctionBegin;
1746   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1747 
1748   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1749   B->data         = (void*)b;
1750   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1751   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1752   B->factor       = 0;
1753   B->assembled    = PETSC_FALSE;
1754   B->mapping      = 0;
1755 
1756   B->insertmode      = NOT_SET_VALUES;
1757   b->size            = size;
1758   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1759 
1760   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1761   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1762 
1763   /* the information in the maps duplicates the information computed below, eventually
1764      we should remove the duplicate information that is not contained in the maps */
1765   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1766   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1767 
1768   /* build local table of row and column ownerships */
1769   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1770   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1771   b->cowners = b->rowners + b->size + 2;
1772   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1773   b->rowners[0] = 0;
1774   for (i=2; i<=b->size; i++) {
1775     b->rowners[i] += b->rowners[i-1];
1776   }
1777   b->rstart = b->rowners[b->rank];
1778   b->rend   = b->rowners[b->rank+1];
1779   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1780   b->cowners[0] = 0;
1781   for (i=2; i<=b->size; i++) {
1782     b->cowners[i] += b->cowners[i-1];
1783   }
1784   b->cstart = b->cowners[b->rank];
1785   b->cend   = b->cowners[b->rank+1];
1786 
1787   /* build cache for off array entries formed */
1788   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1789   b->donotstash  = PETSC_FALSE;
1790   b->colmap      = 0;
1791   b->garray      = 0;
1792   b->roworiented = PETSC_TRUE;
1793 
1794   /* stuff used for matrix vector multiply */
1795   b->lvec      = PETSC_NULL;
1796   b->Mvctx     = PETSC_NULL;
1797 
1798   /* stuff for MatGetRow() */
1799   b->rowindices   = 0;
1800   b->rowvalues    = 0;
1801   b->getrowactive = PETSC_FALSE;
1802 
1803   /* Explicitly create 2 MATSEQAIJ matrices. */
1804   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1805   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1806   PetscLogObjectParent(B,b->A);
1807   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1808   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1809   PetscLogObjectParent(B,b->B);
1810 
1811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1812                                      "MatStoreValues_MPIAIJ",
1813                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1815                                      "MatRetrieveValues_MPIAIJ",
1816                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1818 				     "MatGetDiagonalBlock_MPIAIJ",
1819                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1821 				     "MatIsTranspose_MPIAIJ",
1822 				     MatIsTranspose_MPIAIJ); CHKERRQ(ierr);
1823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1824 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1825 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1827 				     "MatDiagonalScaleLocal_MPIAIJ",
1828 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 EXTERN_C_END
1832 
1833 /*MC
1834    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1835 
1836    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1837    and MATMPIAIJ otherwise.
1838 
1839    Options Database Keys:
1840 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1841 
1842   Level: beginner
1843 
1844 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1845 M*/
1846 
1847 EXTERN_C_BEGIN
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatCreate_AIJ"
1850 int MatCreate_AIJ(Mat A) {
1851   int ierr,size;
1852 
1853   PetscFunctionBegin;
1854   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1855   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1856   if (size == 1) {
1857     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1858   } else {
1859     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 EXTERN_C_END
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1867 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1868 {
1869   Mat        mat;
1870   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1871   int        ierr;
1872 
1873   PetscFunctionBegin;
1874   *newmat       = 0;
1875   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1876   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1877   a    = (Mat_MPIAIJ*)mat->data;
1878   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1879   mat->factor       = matin->factor;
1880   mat->assembled    = PETSC_TRUE;
1881   mat->insertmode   = NOT_SET_VALUES;
1882   mat->preallocated = PETSC_TRUE;
1883 
1884   a->rstart       = oldmat->rstart;
1885   a->rend         = oldmat->rend;
1886   a->cstart       = oldmat->cstart;
1887   a->cend         = oldmat->cend;
1888   a->size         = oldmat->size;
1889   a->rank         = oldmat->rank;
1890   a->donotstash   = oldmat->donotstash;
1891   a->roworiented  = oldmat->roworiented;
1892   a->rowindices   = 0;
1893   a->rowvalues    = 0;
1894   a->getrowactive = PETSC_FALSE;
1895 
1896   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1897   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1898   if (oldmat->colmap) {
1899 #if defined (PETSC_USE_CTABLE)
1900     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1901 #else
1902     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1903     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1904     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1905 #endif
1906   } else a->colmap = 0;
1907   if (oldmat->garray) {
1908     int len;
1909     len  = oldmat->B->n;
1910     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1911     PetscLogObjectMemory(mat,len*sizeof(int));
1912     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1913   } else a->garray = 0;
1914 
1915   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1916   PetscLogObjectParent(mat,a->lvec);
1917   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1918   PetscLogObjectParent(mat,a->Mvctx);
1919   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1920   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1921   PetscLogObjectParent(mat,a->A);
1922   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1923   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1924   PetscLogObjectParent(mat,a->B);
1925   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1926   *newmat = mat;
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #include "petscsys.h"
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatLoad_MPIAIJ"
1934 int MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1935 {
1936   Mat          A;
1937   PetscScalar  *vals,*svals;
1938   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1939   MPI_Status   status;
1940   int          i,nz,ierr,j,rstart,rend,fd;
1941   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1942   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1943   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1944 
1945   PetscFunctionBegin;
1946   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1947   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1948   if (!rank) {
1949     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1950     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1951     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1952     if (header[3] < 0) {
1953       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1954     }
1955   }
1956 
1957   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1958   M = header[1]; N = header[2];
1959   /* determine ownership of all rows */
1960   m = M/size + ((M % size) > rank);
1961   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1962   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1963   rowners[0] = 0;
1964   for (i=2; i<=size; i++) {
1965     rowners[i] += rowners[i-1];
1966   }
1967   rstart = rowners[rank];
1968   rend   = rowners[rank+1];
1969 
1970   /* distribute row lengths to all processors */
1971   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1972   offlens = ourlens + (rend-rstart);
1973   if (!rank) {
1974     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1976     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1977     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1978     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1979     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1980   } else {
1981     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1982   }
1983 
1984   if (!rank) {
1985     /* calculate the number of nonzeros on each processor */
1986     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1987     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1988     for (i=0; i<size; i++) {
1989       for (j=rowners[i]; j< rowners[i+1]; j++) {
1990         procsnz[i] += rowlengths[j];
1991       }
1992     }
1993     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1994 
1995     /* determine max buffer needed and allocate it */
1996     maxnz = 0;
1997     for (i=0; i<size; i++) {
1998       maxnz = PetscMax(maxnz,procsnz[i]);
1999     }
2000     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2001 
2002     /* read in my part of the matrix column indices  */
2003     nz   = procsnz[0];
2004     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2005     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2006 
2007     /* read in every one elses and ship off */
2008     for (i=1; i<size; i++) {
2009       nz   = procsnz[i];
2010       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2011       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2012     }
2013     ierr = PetscFree(cols);CHKERRQ(ierr);
2014   } else {
2015     /* determine buffer space needed for message */
2016     nz = 0;
2017     for (i=0; i<m; i++) {
2018       nz += ourlens[i];
2019     }
2020     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2021 
2022     /* receive message of column indices*/
2023     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2024     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2025     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2026   }
2027 
2028   /* determine column ownership if matrix is not square */
2029   if (N != M) {
2030     n      = N/size + ((N % size) > rank);
2031     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2032     cstart = cend - n;
2033   } else {
2034     cstart = rstart;
2035     cend   = rend;
2036     n      = cend - cstart;
2037   }
2038 
2039   /* loop over local rows, determining number of off diagonal entries */
2040   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2041   jj = 0;
2042   for (i=0; i<m; i++) {
2043     for (j=0; j<ourlens[i]; j++) {
2044       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2045       jj++;
2046     }
2047   }
2048 
2049   /* create our matrix */
2050   for (i=0; i<m; i++) {
2051     ourlens[i] -= offlens[i];
2052   }
2053   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2054   ierr = MatSetType(A,type);CHKERRQ(ierr);
2055   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2056 
2057   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2058   for (i=0; i<m; i++) {
2059     ourlens[i] += offlens[i];
2060   }
2061 
2062   if (!rank) {
2063     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2064 
2065     /* read in my part of the matrix numerical values  */
2066     nz   = procsnz[0];
2067     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2068 
2069     /* insert into matrix */
2070     jj      = rstart;
2071     smycols = mycols;
2072     svals   = vals;
2073     for (i=0; i<m; i++) {
2074       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2075       smycols += ourlens[i];
2076       svals   += ourlens[i];
2077       jj++;
2078     }
2079 
2080     /* read in other processors and ship out */
2081     for (i=1; i<size; i++) {
2082       nz   = procsnz[i];
2083       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2084       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2085     }
2086     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2087   } else {
2088     /* receive numeric values */
2089     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2090 
2091     /* receive message of values*/
2092     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2093     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2094     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2095 
2096     /* insert into matrix */
2097     jj      = rstart;
2098     smycols = mycols;
2099     svals   = vals;
2100     for (i=0; i<m; i++) {
2101       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2102       smycols += ourlens[i];
2103       svals   += ourlens[i];
2104       jj++;
2105     }
2106   }
2107   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2108   ierr = PetscFree(vals);CHKERRQ(ierr);
2109   ierr = PetscFree(mycols);CHKERRQ(ierr);
2110   ierr = PetscFree(rowners);CHKERRQ(ierr);
2111 
2112   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2113   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2114   *newmat = A;
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2120 /*
2121     Not great since it makes two copies of the submatrix, first an SeqAIJ
2122   in local and then by concatenating the local matrices the end result.
2123   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2124 */
2125 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2126 {
2127   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2128   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2129   Mat          *local,M,Mreuse;
2130   PetscScalar  *vwork,*aa;
2131   MPI_Comm     comm = mat->comm;
2132   Mat_SeqAIJ   *aij;
2133 
2134 
2135   PetscFunctionBegin;
2136   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2137   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2138 
2139   if (call ==  MAT_REUSE_MATRIX) {
2140     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2141     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2142     local = &Mreuse;
2143     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2144   } else {
2145     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2146     Mreuse = *local;
2147     ierr   = PetscFree(local);CHKERRQ(ierr);
2148   }
2149 
2150   /*
2151       m - number of local rows
2152       n - number of columns (same on all processors)
2153       rstart - first row in new global matrix generated
2154   */
2155   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2156   if (call == MAT_INITIAL_MATRIX) {
2157     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2158     ii  = aij->i;
2159     jj  = aij->j;
2160 
2161     /*
2162         Determine the number of non-zeros in the diagonal and off-diagonal
2163         portions of the matrix in order to do correct preallocation
2164     */
2165 
2166     /* first get start and end of "diagonal" columns */
2167     if (csize == PETSC_DECIDE) {
2168       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2169       if (mglobal == n) { /* square matrix */
2170 	nlocal = m;
2171       } else {
2172         nlocal = n/size + ((n % size) > rank);
2173       }
2174     } else {
2175       nlocal = csize;
2176     }
2177     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2178     rstart = rend - nlocal;
2179     if (rank == size - 1 && rend != n) {
2180       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2181     }
2182 
2183     /* next, compute all the lengths */
2184     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2185     olens = dlens + m;
2186     for (i=0; i<m; i++) {
2187       jend = ii[i+1] - ii[i];
2188       olen = 0;
2189       dlen = 0;
2190       for (j=0; j<jend; j++) {
2191         if (*jj < rstart || *jj >= rend) olen++;
2192         else dlen++;
2193         jj++;
2194       }
2195       olens[i] = olen;
2196       dlens[i] = dlen;
2197     }
2198     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2199     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2200     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2201     ierr = PetscFree(dlens);CHKERRQ(ierr);
2202   } else {
2203     int ml,nl;
2204 
2205     M = *newmat;
2206     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2207     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2208     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2209     /*
2210          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2211        rather than the slower MatSetValues().
2212     */
2213     M->was_assembled = PETSC_TRUE;
2214     M->assembled     = PETSC_FALSE;
2215   }
2216   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2217   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2218   ii  = aij->i;
2219   jj  = aij->j;
2220   aa  = aij->a;
2221   for (i=0; i<m; i++) {
2222     row   = rstart + i;
2223     nz    = ii[i+1] - ii[i];
2224     cwork = jj;     jj += nz;
2225     vwork = aa;     aa += nz;
2226     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2227   }
2228 
2229   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2230   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2231   *newmat = M;
2232 
2233   /* save submatrix used in processor for next request */
2234   if (call ==  MAT_INITIAL_MATRIX) {
2235     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2236     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2237   }
2238 
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 #undef __FUNCT__
2243 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2244 /*@C
2245    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2246    (the default parallel PETSc format).  For good matrix assembly performance
2247    the user should preallocate the matrix storage by setting the parameters
2248    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2249    performance can be increased by more than a factor of 50.
2250 
2251    Collective on MPI_Comm
2252 
2253    Input Parameters:
2254 +  A - the matrix
2255 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2256            (same value is used for all local rows)
2257 .  d_nnz - array containing the number of nonzeros in the various rows of the
2258            DIAGONAL portion of the local submatrix (possibly different for each row)
2259            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2260            The size of this array is equal to the number of local rows, i.e 'm'.
2261            You must leave room for the diagonal entry even if it is zero.
2262 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2263            submatrix (same value is used for all local rows).
2264 -  o_nnz - array containing the number of nonzeros in the various rows of the
2265            OFF-DIAGONAL portion of the local submatrix (possibly different for
2266            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2267            structure. The size of this array is equal to the number
2268            of local rows, i.e 'm'.
2269 
2270    The AIJ format (also called the Yale sparse matrix format or
2271    compressed row storage), is fully compatible with standard Fortran 77
2272    storage.  That is, the stored row and column indices can begin at
2273    either one (as in Fortran) or zero.  See the users manual for details.
2274 
2275    The user MUST specify either the local or global matrix dimensions
2276    (possibly both).
2277 
2278    The parallel matrix is partitioned such that the first m0 rows belong to
2279    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2280    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2281 
2282    The DIAGONAL portion of the local submatrix of a processor can be defined
2283    as the submatrix which is obtained by extraction the part corresponding
2284    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2285    first row that belongs to the processor, and r2 is the last row belonging
2286    to the this processor. This is a square mxm matrix. The remaining portion
2287    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2288 
2289    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2290 
2291    By default, this format uses inodes (identical nodes) when possible.
2292    We search for consecutive rows with the same nonzero structure, thereby
2293    reusing matrix information to achieve increased efficiency.
2294 
2295    Options Database Keys:
2296 +  -mat_aij_no_inode  - Do not use inodes
2297 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2298 -  -mat_aij_oneindex - Internally use indexing starting at 1
2299         rather than 0.  Note that when calling MatSetValues(),
2300         the user still MUST index entries starting at 0!
2301 
2302    Example usage:
2303 
2304    Consider the following 8x8 matrix with 34 non-zero values, that is
2305    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2306    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2307    as follows:
2308 
2309 .vb
2310             1  2  0  |  0  3  0  |  0  4
2311     Proc0   0  5  6  |  7  0  0  |  8  0
2312             9  0 10  | 11  0  0  | 12  0
2313     -------------------------------------
2314            13  0 14  | 15 16 17  |  0  0
2315     Proc1   0 18  0  | 19 20 21  |  0  0
2316             0  0  0  | 22 23  0  | 24  0
2317     -------------------------------------
2318     Proc2  25 26 27  |  0  0 28  | 29  0
2319            30  0  0  | 31 32 33  |  0 34
2320 .ve
2321 
2322    This can be represented as a collection of submatrices as:
2323 
2324 .vb
2325       A B C
2326       D E F
2327       G H I
2328 .ve
2329 
2330    Where the submatrices A,B,C are owned by proc0, D,E,F are
2331    owned by proc1, G,H,I are owned by proc2.
2332 
2333    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2334    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2335    The 'M','N' parameters are 8,8, and have the same values on all procs.
2336 
2337    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2338    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2339    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2340    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2341    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2342    matrix, ans [DF] as another SeqAIJ matrix.
2343 
2344    When d_nz, o_nz parameters are specified, d_nz storage elements are
2345    allocated for every row of the local diagonal submatrix, and o_nz
2346    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2347    One way to choose d_nz and o_nz is to use the max nonzerors per local
2348    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2349    In this case, the values of d_nz,o_nz are:
2350 .vb
2351      proc0 : dnz = 2, o_nz = 2
2352      proc1 : dnz = 3, o_nz = 2
2353      proc2 : dnz = 1, o_nz = 4
2354 .ve
2355    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2356    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2357    for proc3. i.e we are using 12+15+10=37 storage locations to store
2358    34 values.
2359 
2360    When d_nnz, o_nnz parameters are specified, the storage is specified
2361    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2362    In the above case the values for d_nnz,o_nnz are:
2363 .vb
2364      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2365      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2366      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2367 .ve
2368    Here the space allocated is sum of all the above values i.e 34, and
2369    hence pre-allocation is perfect.
2370 
2371    Level: intermediate
2372 
2373 .keywords: matrix, aij, compressed row, sparse, parallel
2374 
2375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2376 @*/
2377 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2378 {
2379   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2380 
2381   PetscFunctionBegin;
2382   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2383   if (f) {
2384     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2385   }
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatCreateMPIAIJ"
2391 /*@C
2392    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2393    (the default parallel PETSc format).  For good matrix assembly performance
2394    the user should preallocate the matrix storage by setting the parameters
2395    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2396    performance can be increased by more than a factor of 50.
2397 
2398    Collective on MPI_Comm
2399 
2400    Input Parameters:
2401 +  comm - MPI communicator
2402 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2403            This value should be the same as the local size used in creating the
2404            y vector for the matrix-vector product y = Ax.
2405 .  n - This value should be the same as the local size used in creating the
2406        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2407        calculated if N is given) For square matrices n is almost always m.
2408 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2409 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2410 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2411            (same value is used for all local rows)
2412 .  d_nnz - array containing the number of nonzeros in the various rows of the
2413            DIAGONAL portion of the local submatrix (possibly different for each row)
2414            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2415            The size of this array is equal to the number of local rows, i.e 'm'.
2416            You must leave room for the diagonal entry even if it is zero.
2417 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2418            submatrix (same value is used for all local rows).
2419 -  o_nnz - array containing the number of nonzeros in the various rows of the
2420            OFF-DIAGONAL portion of the local submatrix (possibly different for
2421            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2422            structure. The size of this array is equal to the number
2423            of local rows, i.e 'm'.
2424 
2425    Output Parameter:
2426 .  A - the matrix
2427 
2428    Notes:
2429    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2430    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2431    storage requirements for this matrix.
2432 
2433    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2434    processor than it must be used on all processors that share the object for
2435    that argument.
2436 
2437    The AIJ format (also called the Yale sparse matrix format or
2438    compressed row storage), is fully compatible with standard Fortran 77
2439    storage.  That is, the stored row and column indices can begin at
2440    either one (as in Fortran) or zero.  See the users manual for details.
2441 
2442    The user MUST specify either the local or global matrix dimensions
2443    (possibly both).
2444 
2445    The parallel matrix is partitioned such that the first m0 rows belong to
2446    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2447    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2448 
2449    The DIAGONAL portion of the local submatrix of a processor can be defined
2450    as the submatrix which is obtained by extraction the part corresponding
2451    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2452    first row that belongs to the processor, and r2 is the last row belonging
2453    to the this processor. This is a square mxm matrix. The remaining portion
2454    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2455 
2456    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2457 
2458    When calling this routine with a single process communicator, a matrix of
2459    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2460    type of communicator, use the construction mechanism:
2461      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2462 
2463    By default, this format uses inodes (identical nodes) when possible.
2464    We search for consecutive rows with the same nonzero structure, thereby
2465    reusing matrix information to achieve increased efficiency.
2466 
2467    Options Database Keys:
2468 +  -mat_aij_no_inode  - Do not use inodes
2469 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2470 -  -mat_aij_oneindex - Internally use indexing starting at 1
2471         rather than 0.  Note that when calling MatSetValues(),
2472         the user still MUST index entries starting at 0!
2473 
2474 
2475    Example usage:
2476 
2477    Consider the following 8x8 matrix with 34 non-zero values, that is
2478    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2479    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2480    as follows:
2481 
2482 .vb
2483             1  2  0  |  0  3  0  |  0  4
2484     Proc0   0  5  6  |  7  0  0  |  8  0
2485             9  0 10  | 11  0  0  | 12  0
2486     -------------------------------------
2487            13  0 14  | 15 16 17  |  0  0
2488     Proc1   0 18  0  | 19 20 21  |  0  0
2489             0  0  0  | 22 23  0  | 24  0
2490     -------------------------------------
2491     Proc2  25 26 27  |  0  0 28  | 29  0
2492            30  0  0  | 31 32 33  |  0 34
2493 .ve
2494 
2495    This can be represented as a collection of submatrices as:
2496 
2497 .vb
2498       A B C
2499       D E F
2500       G H I
2501 .ve
2502 
2503    Where the submatrices A,B,C are owned by proc0, D,E,F are
2504    owned by proc1, G,H,I are owned by proc2.
2505 
2506    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2507    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2508    The 'M','N' parameters are 8,8, and have the same values on all procs.
2509 
2510    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2511    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2512    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2513    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2514    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2515    matrix, ans [DF] as another SeqAIJ matrix.
2516 
2517    When d_nz, o_nz parameters are specified, d_nz storage elements are
2518    allocated for every row of the local diagonal submatrix, and o_nz
2519    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2520    One way to choose d_nz and o_nz is to use the max nonzerors per local
2521    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2522    In this case, the values of d_nz,o_nz are:
2523 .vb
2524      proc0 : dnz = 2, o_nz = 2
2525      proc1 : dnz = 3, o_nz = 2
2526      proc2 : dnz = 1, o_nz = 4
2527 .ve
2528    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2529    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2530    for proc3. i.e we are using 12+15+10=37 storage locations to store
2531    34 values.
2532 
2533    When d_nnz, o_nnz parameters are specified, the storage is specified
2534    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2535    In the above case the values for d_nnz,o_nnz are:
2536 .vb
2537      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2538      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2539      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2540 .ve
2541    Here the space allocated is sum of all the above values i.e 34, and
2542    hence pre-allocation is perfect.
2543 
2544    Level: intermediate
2545 
2546 .keywords: matrix, aij, compressed row, sparse, parallel
2547 
2548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2549 @*/
2550 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2551 {
2552   int ierr,size;
2553 
2554   PetscFunctionBegin;
2555   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2556   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557   if (size > 1) {
2558     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2559     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2560   } else {
2561     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2562     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2563   }
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2569 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2570 {
2571   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2572   PetscFunctionBegin;
2573   *Ad     = a->A;
2574   *Ao     = a->B;
2575   *colmap = a->garray;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 #undef __FUNCT__
2580 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2581 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2582 {
2583   int        ierr,i;
2584   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2585 
2586   PetscFunctionBegin;
2587   if (coloring->ctype == IS_COLORING_LOCAL) {
2588     ISColoringValue *allcolors,*colors;
2589     ISColoring      ocoloring;
2590 
2591     /* set coloring for diagonal portion */
2592     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2593 
2594     /* set coloring for off-diagonal portion */
2595     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2596     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2597     for (i=0; i<a->B->n; i++) {
2598       colors[i] = allcolors[a->garray[i]];
2599     }
2600     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2601     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2602     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2603     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2604   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2605     ISColoringValue *colors;
2606     int             *larray;
2607     ISColoring      ocoloring;
2608 
2609     /* set coloring for diagonal portion */
2610     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2611     for (i=0; i<a->A->n; i++) {
2612       larray[i] = i + a->cstart;
2613     }
2614     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2615     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2616     for (i=0; i<a->A->n; i++) {
2617       colors[i] = coloring->colors[larray[i]];
2618     }
2619     ierr = PetscFree(larray);CHKERRQ(ierr);
2620     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2621     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2622     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2623 
2624     /* set coloring for off-diagonal portion */
2625     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2626     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2627     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2628     for (i=0; i<a->B->n; i++) {
2629       colors[i] = coloring->colors[larray[i]];
2630     }
2631     ierr = PetscFree(larray);CHKERRQ(ierr);
2632     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2633     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2634     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2635   } else {
2636     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2637   }
2638 
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 #undef __FUNCT__
2643 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2644 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2645 {
2646   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2647   int        ierr;
2648 
2649   PetscFunctionBegin;
2650   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2651   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2657 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2658 {
2659   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2660   int        ierr;
2661 
2662   PetscFunctionBegin;
2663   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2664   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatMerge"
2670 /*@C
2671       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2672                  matrices from each processor
2673 
2674     Collective on MPI_Comm
2675 
2676    Input Parameters:
2677 +    comm - the communicators the parallel matrix will live on
2678 -    inmat - the input sequential matrices
2679 
2680    Output Parameter:
2681 .    outmat - the parallel matrix generated
2682 
2683     Level: advanced
2684 
2685    Notes: The number of columns of the matrix in EACH of the seperate files
2686       MUST be the same.
2687 
2688 @*/
2689 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2690 {
2691   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2692   PetscScalar *values;
2693   PetscMap    columnmap,rowmap;
2694 
2695   PetscFunctionBegin;
2696 
2697   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2698 
2699   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2700   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2701   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2702   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2703   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2704   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2705 
2706   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2707   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2708   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2709   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2710   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2711 
2712   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2713   for (i=0;i<m;i++) {
2714     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2715     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2716     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2717   }
2718   /* This routine will ONLY return MPIAIJ type matrix */
2719   ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2720   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2721   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2722   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2723 
2724   for (i=0;i<m;i++) {
2725     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2726     I    = i + rstart;
2727     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2728     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2729   }
2730   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2731   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2732   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2733 
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatFileSplit"
2739 int MatFileSplit(Mat A,char *outfile)
2740 {
2741   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2742   PetscViewer out;
2743   char        *name;
2744   Mat         B;
2745   PetscScalar *values;
2746 
2747   PetscFunctionBegin;
2748 
2749   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2750   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2751   /* Should this be the type of the diagonal block of A? */
2752   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2753   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2754   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2755   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2756   for (i=0;i<m;i++) {
2757     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2758     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2759     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2760   }
2761   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2763 
2764   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2765   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2766   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2767   sprintf(name,"%s.%d",outfile,rank);
2768   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2769   ierr = PetscFree(name);
2770   ierr = MatView(B,out);CHKERRQ(ierr);
2771   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2772   ierr = MatDestroy(B);CHKERRQ(ierr);
2773   PetscFunctionReturn(0);
2774 }
2775