xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 292f8084fb157dadf9a2ae26c5bd14368ed7ffcb)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   PetscErrorCode ierr;
191   int          i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int         i,j,rstart,ncols,n,flg;
351   int         *row,*col,other_disassembled;
352   PetscScalar *val;
353   InsertMode  addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387     }
388   }
389 
390   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
391     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
392   }
393   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
395 
396   if (aij->rowvalues) {
397     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
398     aij->rowvalues = 0;
399   }
400 
401   /* used by MatAXPY() */
402   a->xtoy = 0; b->xtoy = 0;
403   a->XtoY = 0; b->XtoY = 0;
404 
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatZeroRows_MPIAIJ"
423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   PetscErrorCode ierr;
427   int            i,N,*rows,*owners = l->rowners,size = l->size;
428   int            *nprocs,j,idx,nsends,row;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436   PetscTruth     found;
437 
438   PetscFunctionBegin;
439   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
440   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
441 
442   /*  first count number of contributors to each processor */
443   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
444   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
445   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
446   for (i=0; i<N; i++) {
447     idx = rows[i];
448     found = PETSC_FALSE;
449     for (j=0; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
452       }
453     }
454     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
455   }
456   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
457 
458   /* inform other processors of number of messages and max length*/
459   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
463   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
464   for (i=0; i<nrecvs; i++) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
474   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
475   starts[0] = 0;
476   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
477   for (i=0; i<N; i++) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
481 
482   starts[0] = 0;
483   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   count = 0;
485   for (i=0; i<size; i++) {
486     if (nprocs[2*i+1]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]    = n;
504     slen          += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
511   count = 0;
512   for (i=0; i<nrecvs; i++) {
513     values = rvalues + i*nmax;
514     for (j=0; j<lens[i]; j++) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PetscLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for (i = 0; i < slen; i++) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
560     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMult_MPIAIJ"
571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574   PetscErrorCode ierr;
575   int        nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_MPIAIJ"
591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
629 {
630   MPI_Comm comm;
631   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
632   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
633   IS         Me,Notme;
634   PetscErrorCode ierr;
635   int        M,N,first,last,*notme,ntids,i;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
645   if (ntids==1) PetscFunctionReturn(0);
646 
647   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
648   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
650   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
651   for (i=0; i<first; i++) notme[i] = i;
652   for (i=last; i<M; i++) notme[i-last+first] = i;
653   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
654   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
655   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
656   Aoff = Aoffs[0];
657   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
658   Boff = Boffs[0];
659   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
660   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
662   ierr = ISDestroy(Me);CHKERRQ(ierr);
663   ierr = ISDestroy(Notme);CHKERRQ(ierr);
664 
665   PetscFunctionReturn(0);
666 }
667 EXTERN_C_END
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   /* do nondiagonal part */
678   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
679   /* send it on its way */
680   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   /* do local part */
682   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
683   /* receive remote parts: note this assumes the values are not actually */
684   /* inserted in yy until the next line, which is true for my implementation*/
685   /* but is not perhaps always true. */
686   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691   This only works correctly for square matrices where the subblock A->A is the
692    diagonal block
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
697 {
698   PetscErrorCode ierr;
699   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
700 
701   PetscFunctionBegin;
702   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
703   if (a->rstart != a->cstart || a->rend != a->cend) {
704     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
705   }
706   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatScale_MPIAIJ"
712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
719   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatDestroy_MPIAIJ"
725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731 #if defined(PETSC_USE_LOG)
732   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
733 #endif
734   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
735   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
736   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
738 #if defined (PETSC_USE_CTABLE)
739   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
740 #else
741   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
742 #endif
743   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
744   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
745   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
746   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
747   ierr = PetscFree(aij);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatView_MPIAIJ_Binary"
753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
754 {
755   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
756   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
757   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
758   PetscErrorCode    ierr;
759   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
760   int               nz,fd,header[4],*row_lengths,*range,rlen,i;
761   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
762   PetscScalar       *column_values;
763 
764   PetscFunctionBegin;
765   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
766   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
767   nz   = A->nz + B->nz;
768   if (!rank) {
769     header[0] = MAT_FILE_COOKIE;
770     header[1] = mat->M;
771     header[2] = mat->N;
772     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
774     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
775     /* get largest number of rows any processor has */
776     rlen = mat->m;
777     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
778     for (i=1; i<size; i++) {
779       rlen = PetscMax(rlen,range[i+1] - range[i]);
780     }
781   } else {
782     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
783     rlen = mat->m;
784   }
785 
786   /* load up the local row counts */
787   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
788   for (i=0; i<mat->m; i++) {
789     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
790   }
791 
792   /* store the row lengths to the file */
793   if (!rank) {
794     MPI_Status status;
795     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
796     for (i=1; i<size; i++) {
797       rlen = range[i+1] - range[i];
798       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
799       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
800     }
801   } else {
802     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
803   }
804   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
805 
806   /* load up the local column indices */
807   nzmax = nz; /* )th processor needs space a largest processor needs */
808   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
809   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
810   cnt  = 0;
811   for (i=0; i<mat->m; i++) {
812     for (j=B->i[i]; j<B->i[i+1]; j++) {
813       if ( (col = garray[B->j[j]]) > cstart) break;
814       column_indices[cnt++] = col;
815     }
816     for (k=A->i[i]; k<A->i[i+1]; k++) {
817       column_indices[cnt++] = A->j[k] + cstart;
818     }
819     for (; j<B->i[i+1]; j++) {
820       column_indices[cnt++] = garray[B->j[j]];
821     }
822   }
823   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
824 
825   /* store the column indices to the file */
826   if (!rank) {
827     MPI_Status status;
828     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
829     for (i=1; i<size; i++) {
830       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
831       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
832       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
833       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
834     }
835   } else {
836     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
837     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
838   }
839   ierr = PetscFree(column_indices);CHKERRQ(ierr);
840 
841   /* load up the local column values */
842   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
843   cnt  = 0;
844   for (i=0; i<mat->m; i++) {
845     for (j=B->i[i]; j<B->i[i+1]; j++) {
846       if ( garray[B->j[j]] > cstart) break;
847       column_values[cnt++] = B->a[j];
848     }
849     for (k=A->i[i]; k<A->i[i+1]; k++) {
850       column_values[cnt++] = A->a[k];
851     }
852     for (; j<B->i[i+1]; j++) {
853       column_values[cnt++] = B->a[j];
854     }
855   }
856   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
857 
858   /* store the column values to the file */
859   if (!rank) {
860     MPI_Status status;
861     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
862     for (i=1; i<size; i++) {
863       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
864       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
865       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
866       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
867     }
868   } else {
869     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
870     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
871   }
872   ierr = PetscFree(column_values);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
878 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
879 {
880   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
881   PetscErrorCode    ierr;
882   PetscMPIInt       rank = aij->rank,size = aij->size;
883   PetscTruth        isdraw,iascii,flg,isbinary;
884   PetscViewer       sviewer;
885   PetscViewerFormat format;
886 
887   PetscFunctionBegin;
888   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
891   if (iascii) {
892     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
893     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
894       MatInfo info;
895       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
896       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
898       if (flg) {
899         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
900 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
901       } else {
902         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
903 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
904       }
905       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
906       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
907       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
908       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
909       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
910       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_INFO) {
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ *Aloc;
939     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     if (!rank) {
943       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
944     } else {
945       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     PetscLogObjectParent(mat,A);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = aij->rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = aij->rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatRelax_MPIAIJ"
1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1018 {
1019   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscErrorCode ierr;
1021   Vec          bb1;
1022   PetscScalar  mone=-1.0;
1023 
1024   PetscFunctionBegin;
1025   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1026 
1027   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1028 
1029   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1030     if (flag & SOR_ZERO_INITIAL_GUESS) {
1031       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1032       its--;
1033     }
1034 
1035     while (its--) {
1036       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038 
1039       /* update rhs: bb1 = bb - B*x */
1040       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1041       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1042 
1043       /* local sweep */
1044       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1045       CHKERRQ(ierr);
1046     }
1047   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1048     if (flag & SOR_ZERO_INITIAL_GUESS) {
1049       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1050       its--;
1051     }
1052     while (its--) {
1053       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055 
1056       /* update rhs: bb1 = bb - B*x */
1057       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1058       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1059 
1060       /* local sweep */
1061       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1062       CHKERRQ(ierr);
1063     }
1064   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1065     if (flag & SOR_ZERO_INITIAL_GUESS) {
1066       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1067       its--;
1068     }
1069     while (its--) {
1070       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072 
1073       /* update rhs: bb1 = bb - B*x */
1074       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1075       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1076 
1077       /* local sweep */
1078       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1079       CHKERRQ(ierr);
1080     }
1081   } else {
1082     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1083   }
1084 
1085   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1091 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1092 {
1093   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1094   Mat        A = mat->A,B = mat->B;
1095   PetscErrorCode ierr;
1096   PetscReal  isend[5],irecv[5];
1097 
1098   PetscFunctionBegin;
1099   info->block_size     = 1.0;
1100   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1101   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1102   isend[3] = info->memory;  isend[4] = info->mallocs;
1103   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1104   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1105   isend[3] += info->memory;  isend[4] += info->mallocs;
1106   if (flag == MAT_LOCAL) {
1107     info->nz_used      = isend[0];
1108     info->nz_allocated = isend[1];
1109     info->nz_unneeded  = isend[2];
1110     info->memory       = isend[3];
1111     info->mallocs      = isend[4];
1112   } else if (flag == MAT_GLOBAL_MAX) {
1113     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1114     info->nz_used      = irecv[0];
1115     info->nz_allocated = irecv[1];
1116     info->nz_unneeded  = irecv[2];
1117     info->memory       = irecv[3];
1118     info->mallocs      = irecv[4];
1119   } else if (flag == MAT_GLOBAL_SUM) {
1120     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1121     info->nz_used      = irecv[0];
1122     info->nz_allocated = irecv[1];
1123     info->nz_unneeded  = irecv[2];
1124     info->memory       = irecv[3];
1125     info->mallocs      = irecv[4];
1126   }
1127   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1128   info->fill_ratio_needed = 0;
1129   info->factor_mallocs    = 0;
1130   info->rows_global       = (double)matin->M;
1131   info->columns_global    = (double)matin->N;
1132   info->rows_local        = (double)matin->m;
1133   info->columns_local     = (double)matin->N;
1134 
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatSetOption_MPIAIJ"
1140 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1141 {
1142   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1143   PetscErrorCode ierr;
1144 
1145   PetscFunctionBegin;
1146   switch (op) {
1147   case MAT_NO_NEW_NONZERO_LOCATIONS:
1148   case MAT_YES_NEW_NONZERO_LOCATIONS:
1149   case MAT_COLUMNS_UNSORTED:
1150   case MAT_COLUMNS_SORTED:
1151   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1152   case MAT_KEEP_ZEROED_ROWS:
1153   case MAT_NEW_NONZERO_LOCATION_ERR:
1154   case MAT_USE_INODES:
1155   case MAT_DO_NOT_USE_INODES:
1156   case MAT_IGNORE_ZERO_ENTRIES:
1157     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1158     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1159     break;
1160   case MAT_ROW_ORIENTED:
1161     a->roworiented = PETSC_TRUE;
1162     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1163     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1164     break;
1165   case MAT_ROWS_SORTED:
1166   case MAT_ROWS_UNSORTED:
1167   case MAT_YES_NEW_DIAGONALS:
1168     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1169     break;
1170   case MAT_COLUMN_ORIENTED:
1171     a->roworiented = PETSC_FALSE;
1172     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1173     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1174     break;
1175   case MAT_IGNORE_OFF_PROC_ENTRIES:
1176     a->donotstash = PETSC_TRUE;
1177     break;
1178   case MAT_NO_NEW_DIAGONALS:
1179     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1180   case MAT_SYMMETRIC:
1181   case MAT_STRUCTURALLY_SYMMETRIC:
1182   case MAT_HERMITIAN:
1183   case MAT_SYMMETRY_ETERNAL:
1184     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1185     break;
1186   case MAT_NOT_SYMMETRIC:
1187   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1188   case MAT_NOT_HERMITIAN:
1189   case MAT_NOT_SYMMETRY_ETERNAL:
1190     break;
1191   default:
1192     SETERRQ(PETSC_ERR_SUP,"unknown option");
1193   }
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatGetRow_MPIAIJ"
1199 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1200 {
1201   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1202   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1203   PetscErrorCode ierr;
1204   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1205   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1206   int          *cmap,*idx_p;
1207 
1208   PetscFunctionBegin;
1209   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1210   mat->getrowactive = PETSC_TRUE;
1211 
1212   if (!mat->rowvalues && (idx || v)) {
1213     /*
1214         allocate enough space to hold information from the longest row.
1215     */
1216     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1217     int     max = 1,tmp;
1218     for (i=0; i<matin->m; i++) {
1219       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1220       if (max < tmp) { max = tmp; }
1221     }
1222     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1223     mat->rowindices = (int*)(mat->rowvalues + max);
1224   }
1225 
1226   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1227   lrow = row - rstart;
1228 
1229   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1230   if (!v)   {pvA = 0; pvB = 0;}
1231   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1232   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1233   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1234   nztot = nzA + nzB;
1235 
1236   cmap  = mat->garray;
1237   if (v  || idx) {
1238     if (nztot) {
1239       /* Sort by increasing column numbers, assuming A and B already sorted */
1240       int imark = -1;
1241       if (v) {
1242         *v = v_p = mat->rowvalues;
1243         for (i=0; i<nzB; i++) {
1244           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1245           else break;
1246         }
1247         imark = i;
1248         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1249         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1250       }
1251       if (idx) {
1252         *idx = idx_p = mat->rowindices;
1253         if (imark > -1) {
1254           for (i=0; i<imark; i++) {
1255             idx_p[i] = cmap[cworkB[i]];
1256           }
1257         } else {
1258           for (i=0; i<nzB; i++) {
1259             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1260             else break;
1261           }
1262           imark = i;
1263         }
1264         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1265         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1266       }
1267     } else {
1268       if (idx) *idx = 0;
1269       if (v)   *v   = 0;
1270     }
1271   }
1272   *nz = nztot;
1273   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1274   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1280 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1281 {
1282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1283 
1284   PetscFunctionBegin;
1285   if (aij->getrowactive == PETSC_FALSE) {
1286     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1287   }
1288   aij->getrowactive = PETSC_FALSE;
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatNorm_MPIAIJ"
1294 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1295 {
1296   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1297   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1298   PetscErrorCode ierr;
1299   int          i,j,cstart = aij->cstart;
1300   PetscReal    sum = 0.0;
1301   PetscScalar  *v;
1302 
1303   PetscFunctionBegin;
1304   if (aij->size == 1) {
1305     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1306   } else {
1307     if (type == NORM_FROBENIUS) {
1308       v = amat->a;
1309       for (i=0; i<amat->nz; i++) {
1310 #if defined(PETSC_USE_COMPLEX)
1311         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1312 #else
1313         sum += (*v)*(*v); v++;
1314 #endif
1315       }
1316       v = bmat->a;
1317       for (i=0; i<bmat->nz; i++) {
1318 #if defined(PETSC_USE_COMPLEX)
1319         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1320 #else
1321         sum += (*v)*(*v); v++;
1322 #endif
1323       }
1324       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1325       *norm = sqrt(*norm);
1326     } else if (type == NORM_1) { /* max column norm */
1327       PetscReal *tmp,*tmp2;
1328       int    *jj,*garray = aij->garray;
1329       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1330       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1331       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1332       *norm = 0.0;
1333       v = amat->a; jj = amat->j;
1334       for (j=0; j<amat->nz; j++) {
1335         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1336       }
1337       v = bmat->a; jj = bmat->j;
1338       for (j=0; j<bmat->nz; j++) {
1339         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1340       }
1341       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1342       for (j=0; j<mat->N; j++) {
1343         if (tmp2[j] > *norm) *norm = tmp2[j];
1344       }
1345       ierr = PetscFree(tmp);CHKERRQ(ierr);
1346       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1347     } else if (type == NORM_INFINITY) { /* max row norm */
1348       PetscReal ntemp = 0.0;
1349       for (j=0; j<aij->A->m; j++) {
1350         v = amat->a + amat->i[j];
1351         sum = 0.0;
1352         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1353           sum += PetscAbsScalar(*v); v++;
1354         }
1355         v = bmat->a + bmat->i[j];
1356         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1357           sum += PetscAbsScalar(*v); v++;
1358         }
1359         if (sum > ntemp) ntemp = sum;
1360       }
1361       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1362     } else {
1363       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1364     }
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatTranspose_MPIAIJ"
1371 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1372 {
1373   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1374   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1375   PetscErrorCode ierr;
1376   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1377   Mat          B;
1378   PetscScalar  *array;
1379 
1380   PetscFunctionBegin;
1381   if (!matout && M != N) {
1382     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1383   }
1384 
1385   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1386   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1387   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1388 
1389   /* copy over the A part */
1390   Aloc = (Mat_SeqAIJ*)a->A->data;
1391   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1392   row = a->rstart;
1393   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1394   for (i=0; i<m; i++) {
1395     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1396     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1397   }
1398   aj = Aloc->j;
1399   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1400 
1401   /* copy over the B part */
1402   Aloc = (Mat_SeqAIJ*)a->B->data;
1403   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1404   row  = a->rstart;
1405   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1406   ct   = cols;
1407   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1408   for (i=0; i<m; i++) {
1409     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1410     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1411   }
1412   ierr = PetscFree(ct);CHKERRQ(ierr);
1413   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415   if (matout) {
1416     *matout = B;
1417   } else {
1418     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1419   }
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1425 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1426 {
1427   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1428   Mat        a = aij->A,b = aij->B;
1429   PetscErrorCode ierr;
1430   int        s1,s2,s3;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1434   if (rr) {
1435     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1436     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1437     /* Overlap communication with computation. */
1438     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1439   }
1440   if (ll) {
1441     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1442     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1443     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1444   }
1445   /* scale  the diagonal block */
1446   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1447 
1448   if (rr) {
1449     /* Do a scatter end and then right scale the off-diagonal block */
1450     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1451     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1452   }
1453 
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1460 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1461 {
1462   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   if (!a->rank) {
1467     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1474 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1475 {
1476   PetscFunctionBegin;
1477   *bs = 1;
1478   PetscFunctionReturn(0);
1479 }
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1482 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1483 {
1484   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatEqual_MPIAIJ"
1494 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1495 {
1496   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1497   Mat        a,b,c,d;
1498   PetscTruth flg;
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   a = matA->A; b = matA->B;
1503   c = matB->A; d = matB->B;
1504 
1505   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1506   if (flg == PETSC_TRUE) {
1507     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1508   }
1509   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatCopy_MPIAIJ"
1515 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1516 {
1517   PetscErrorCode ierr;
1518   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1519   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1520 
1521   PetscFunctionBegin;
1522   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1523   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1524     /* because of the column compression in the off-processor part of the matrix a->B,
1525        the number of columns in a->B and b->B may be different, hence we cannot call
1526        the MatCopy() directly on the two parts. If need be, we can provide a more
1527        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1528        then copying the submatrices */
1529     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1530   } else {
1531     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1532     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1539 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1540 {
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #include "petscblaslapack.h"
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatAXPY_MPIAIJ"
1551 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1552 {
1553   PetscErrorCode ierr;
1554   int            i;
1555   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1556   PetscBLASInt   bnz,one=1;
1557   Mat_SeqAIJ     *x,*y;
1558 
1559   PetscFunctionBegin;
1560   if (str == SAME_NONZERO_PATTERN) {
1561     x = (Mat_SeqAIJ *)xx->A->data;
1562     y = (Mat_SeqAIJ *)yy->A->data;
1563     bnz = (PetscBLASInt)x->nz;
1564     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1565     x = (Mat_SeqAIJ *)xx->B->data;
1566     y = (Mat_SeqAIJ *)yy->B->data;
1567     bnz = (PetscBLASInt)x->nz;
1568     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1569   } else if (str == SUBSET_NONZERO_PATTERN) {
1570     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1571 
1572     x = (Mat_SeqAIJ *)xx->B->data;
1573     y = (Mat_SeqAIJ *)yy->B->data;
1574     if (y->xtoy && y->XtoY != xx->B) {
1575       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1576       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1577     }
1578     if (!y->xtoy) { /* get xtoy */
1579       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1580       y->XtoY = xx->B;
1581     }
1582     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1583   } else {
1584     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 /* -------------------------------------------------------------------*/
1590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1591        MatGetRow_MPIAIJ,
1592        MatRestoreRow_MPIAIJ,
1593        MatMult_MPIAIJ,
1594 /* 4*/ MatMultAdd_MPIAIJ,
1595        MatMultTranspose_MPIAIJ,
1596        MatMultTransposeAdd_MPIAIJ,
1597        0,
1598        0,
1599        0,
1600 /*10*/ 0,
1601        0,
1602        0,
1603        MatRelax_MPIAIJ,
1604        MatTranspose_MPIAIJ,
1605 /*15*/ MatGetInfo_MPIAIJ,
1606        MatEqual_MPIAIJ,
1607        MatGetDiagonal_MPIAIJ,
1608        MatDiagonalScale_MPIAIJ,
1609        MatNorm_MPIAIJ,
1610 /*20*/ MatAssemblyBegin_MPIAIJ,
1611        MatAssemblyEnd_MPIAIJ,
1612        0,
1613        MatSetOption_MPIAIJ,
1614        MatZeroEntries_MPIAIJ,
1615 /*25*/ MatZeroRows_MPIAIJ,
1616 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1617        MatLUFactorSymbolic_MPIAIJ_TFS,
1618 #else
1619        0,
1620 #endif
1621        0,
1622        0,
1623        0,
1624 /*30*/ MatSetUpPreallocation_MPIAIJ,
1625        0,
1626        0,
1627        0,
1628        0,
1629 /*35*/ MatDuplicate_MPIAIJ,
1630        0,
1631        0,
1632        0,
1633        0,
1634 /*40*/ MatAXPY_MPIAIJ,
1635        MatGetSubMatrices_MPIAIJ,
1636        MatIncreaseOverlap_MPIAIJ,
1637        MatGetValues_MPIAIJ,
1638        MatCopy_MPIAIJ,
1639 /*45*/ MatPrintHelp_MPIAIJ,
1640        MatScale_MPIAIJ,
1641        0,
1642        0,
1643        0,
1644 /*50*/ MatGetBlockSize_MPIAIJ,
1645        0,
1646        0,
1647        0,
1648        0,
1649 /*55*/ MatFDColoringCreate_MPIAIJ,
1650        0,
1651        MatSetUnfactored_MPIAIJ,
1652        0,
1653        0,
1654 /*60*/ MatGetSubMatrix_MPIAIJ,
1655        MatDestroy_MPIAIJ,
1656        MatView_MPIAIJ,
1657        MatGetPetscMaps_Petsc,
1658        0,
1659 /*65*/ 0,
1660        0,
1661        0,
1662        0,
1663        0,
1664 /*70*/ 0,
1665        0,
1666        MatSetColoring_MPIAIJ,
1667        MatSetValuesAdic_MPIAIJ,
1668        MatSetValuesAdifor_MPIAIJ,
1669 /*75*/ 0,
1670        0,
1671        0,
1672        0,
1673        0,
1674 /*80*/ 0,
1675        0,
1676        0,
1677        0,
1678 /*84*/ MatLoad_MPIAIJ,
1679        0,
1680        0,
1681        0,
1682        0,
1683 /*89*/ 0,
1684        MatMatMult_MPIAIJ_MPIAIJ,
1685        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1686        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1687        MatPtAP_MPIAIJ_MPIAIJ,
1688 /*94*/ MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1689        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1690 };
1691 
1692 /* ----------------------------------------------------------------------------------------*/
1693 
1694 EXTERN_C_BEGIN
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1697 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1698 {
1699   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1704   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 EXTERN_C_END
1708 
1709 EXTERN_C_BEGIN
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1712 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1713 {
1714   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1719   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 #include "petscpc.h"
1725 EXTERN_C_BEGIN
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1728 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1729 {
1730   Mat_MPIAIJ   *b;
1731   PetscErrorCode ierr;
1732   int          i;
1733 
1734   PetscFunctionBegin;
1735   B->preallocated = PETSC_TRUE;
1736   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1737   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1738   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1739   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1740   if (d_nnz) {
1741     for (i=0; i<B->m; i++) {
1742       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1743     }
1744   }
1745   if (o_nnz) {
1746     for (i=0; i<B->m; i++) {
1747       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1748     }
1749   }
1750   b = (Mat_MPIAIJ*)B->data;
1751   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1752   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1753 
1754   PetscFunctionReturn(0);
1755 }
1756 EXTERN_C_END
1757 
1758 /*MC
1759    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1760 
1761    Options Database Keys:
1762 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1763 
1764   Level: beginner
1765 
1766 .seealso: MatCreateMPIAIJ
1767 M*/
1768 
1769 EXTERN_C_BEGIN
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatCreate_MPIAIJ"
1772 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1773 {
1774   Mat_MPIAIJ *b;
1775   PetscErrorCode ierr;
1776   int        i,size;
1777 
1778   PetscFunctionBegin;
1779   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1780 
1781   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1782   B->data         = (void*)b;
1783   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1784   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1785   B->factor       = 0;
1786   B->assembled    = PETSC_FALSE;
1787   B->mapping      = 0;
1788 
1789   B->insertmode      = NOT_SET_VALUES;
1790   b->size            = size;
1791   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1792 
1793   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1794   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1795 
1796   /* the information in the maps duplicates the information computed below, eventually
1797      we should remove the duplicate information that is not contained in the maps */
1798   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1799   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1800 
1801   /* build local table of row and column ownerships */
1802   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1803   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1804   b->cowners = b->rowners + b->size + 2;
1805   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1806   b->rowners[0] = 0;
1807   for (i=2; i<=b->size; i++) {
1808     b->rowners[i] += b->rowners[i-1];
1809   }
1810   b->rstart = b->rowners[b->rank];
1811   b->rend   = b->rowners[b->rank+1];
1812   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1813   b->cowners[0] = 0;
1814   for (i=2; i<=b->size; i++) {
1815     b->cowners[i] += b->cowners[i-1];
1816   }
1817   b->cstart = b->cowners[b->rank];
1818   b->cend   = b->cowners[b->rank+1];
1819 
1820   /* build cache for off array entries formed */
1821   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1822   b->donotstash  = PETSC_FALSE;
1823   b->colmap      = 0;
1824   b->garray      = 0;
1825   b->roworiented = PETSC_TRUE;
1826 
1827   /* stuff used for matrix vector multiply */
1828   b->lvec      = PETSC_NULL;
1829   b->Mvctx     = PETSC_NULL;
1830 
1831   /* stuff for MatGetRow() */
1832   b->rowindices   = 0;
1833   b->rowvalues    = 0;
1834   b->getrowactive = PETSC_FALSE;
1835 
1836   /* Explicitly create 2 MATSEQAIJ matrices. */
1837   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1838   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1839   PetscLogObjectParent(B,b->A);
1840   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1841   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1842   PetscLogObjectParent(B,b->B);
1843 
1844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1845                                      "MatStoreValues_MPIAIJ",
1846                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1848                                      "MatRetrieveValues_MPIAIJ",
1849                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1851 				     "MatGetDiagonalBlock_MPIAIJ",
1852                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1854 				     "MatIsTranspose_MPIAIJ",
1855 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1857 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1858 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1860 				     "MatDiagonalScaleLocal_MPIAIJ",
1861 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 EXTERN_C_END
1865 
1866 /*MC
1867    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1868 
1869    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1870    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1871   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1872   for communicators controlling multiple processes.  It is recommended that you call both of
1873   the above preallocation routines for simplicity.
1874 
1875    Options Database Keys:
1876 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1877 
1878   Level: beginner
1879 
1880 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1881 M*/
1882 
1883 EXTERN_C_BEGIN
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatCreate_AIJ"
1886 PetscErrorCode MatCreate_AIJ(Mat A)
1887 {
1888   PetscErrorCode ierr;
1889   int size;
1890 
1891   PetscFunctionBegin;
1892   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1893   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1894   if (size == 1) {
1895     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1896   } else {
1897     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 EXTERN_C_END
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1905 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1906 {
1907   Mat        mat;
1908   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1909   PetscErrorCode ierr;
1910 
1911   PetscFunctionBegin;
1912   *newmat       = 0;
1913   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1914   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1915   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1916   a    = (Mat_MPIAIJ*)mat->data;
1917 
1918   mat->factor       = matin->factor;
1919   mat->assembled    = PETSC_TRUE;
1920   mat->insertmode   = NOT_SET_VALUES;
1921   mat->preallocated = PETSC_TRUE;
1922 
1923   a->rstart       = oldmat->rstart;
1924   a->rend         = oldmat->rend;
1925   a->cstart       = oldmat->cstart;
1926   a->cend         = oldmat->cend;
1927   a->size         = oldmat->size;
1928   a->rank         = oldmat->rank;
1929   a->donotstash   = oldmat->donotstash;
1930   a->roworiented  = oldmat->roworiented;
1931   a->rowindices   = 0;
1932   a->rowvalues    = 0;
1933   a->getrowactive = PETSC_FALSE;
1934 
1935   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1936   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1937   if (oldmat->colmap) {
1938 #if defined (PETSC_USE_CTABLE)
1939     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1940 #else
1941     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1942     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1943     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1944 #endif
1945   } else a->colmap = 0;
1946   if (oldmat->garray) {
1947     int len;
1948     len  = oldmat->B->n;
1949     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1950     PetscLogObjectMemory(mat,len*sizeof(int));
1951     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1952   } else a->garray = 0;
1953 
1954   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1955   PetscLogObjectParent(mat,a->lvec);
1956   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1957   PetscLogObjectParent(mat,a->Mvctx);
1958   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1959   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1960   PetscLogObjectParent(mat,a->A);
1961   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1962   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1963   PetscLogObjectParent(mat,a->B);
1964   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1965   *newmat = mat;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #include "petscsys.h"
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatLoad_MPIAIJ"
1973 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1974 {
1975   Mat            A;
1976   PetscScalar    *vals,*svals;
1977   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1978   MPI_Status     status;
1979   PetscErrorCode ierr;
1980   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1981   int            i,nz,j,rstart,rend,fd;
1982   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1983   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1984   int            cend,cstart,n;
1985 
1986   PetscFunctionBegin;
1987   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1988   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1989   if (!rank) {
1990     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1991     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1992     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1993     if (header[3] < 0) {
1994       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1995     }
1996   }
1997 
1998   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1999   M = header[1]; N = header[2];
2000   /* determine ownership of all rows */
2001   m = M/size + ((M % size) > rank);
2002   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2003   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2004   rowners[0] = 0;
2005   for (i=2; i<=size; i++) {
2006     rowners[i] += rowners[i-1];
2007   }
2008   rstart = rowners[rank];
2009   rend   = rowners[rank+1];
2010 
2011   /* distribute row lengths to all processors */
2012   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2013   offlens = ourlens + (rend-rstart);
2014   if (!rank) {
2015     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2016     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2017     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2018     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2019     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2020     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2021   } else {
2022     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2023   }
2024 
2025   if (!rank) {
2026     /* calculate the number of nonzeros on each processor */
2027     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2028     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2029     for (i=0; i<size; i++) {
2030       for (j=rowners[i]; j< rowners[i+1]; j++) {
2031         procsnz[i] += rowlengths[j];
2032       }
2033     }
2034     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2035 
2036     /* determine max buffer needed and allocate it */
2037     maxnz = 0;
2038     for (i=0; i<size; i++) {
2039       maxnz = PetscMax(maxnz,procsnz[i]);
2040     }
2041     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2042 
2043     /* read in my part of the matrix column indices  */
2044     nz   = procsnz[0];
2045     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2046     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2047 
2048     /* read in every one elses and ship off */
2049     for (i=1; i<size; i++) {
2050       nz   = procsnz[i];
2051       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2052       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2053     }
2054     ierr = PetscFree(cols);CHKERRQ(ierr);
2055   } else {
2056     /* determine buffer space needed for message */
2057     nz = 0;
2058     for (i=0; i<m; i++) {
2059       nz += ourlens[i];
2060     }
2061     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2062 
2063     /* receive message of column indices*/
2064     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2065     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2066     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2067   }
2068 
2069   /* determine column ownership if matrix is not square */
2070   if (N != M) {
2071     n      = N/size + ((N % size) > rank);
2072     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2073     cstart = cend - n;
2074   } else {
2075     cstart = rstart;
2076     cend   = rend;
2077     n      = cend - cstart;
2078   }
2079 
2080   /* loop over local rows, determining number of off diagonal entries */
2081   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2082   jj = 0;
2083   for (i=0; i<m; i++) {
2084     for (j=0; j<ourlens[i]; j++) {
2085       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2086       jj++;
2087     }
2088   }
2089 
2090   /* create our matrix */
2091   for (i=0; i<m; i++) {
2092     ourlens[i] -= offlens[i];
2093   }
2094   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2095   ierr = MatSetType(A,type);CHKERRQ(ierr);
2096   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2097 
2098   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2099   for (i=0; i<m; i++) {
2100     ourlens[i] += offlens[i];
2101   }
2102 
2103   if (!rank) {
2104     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2105 
2106     /* read in my part of the matrix numerical values  */
2107     nz   = procsnz[0];
2108     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2109 
2110     /* insert into matrix */
2111     jj      = rstart;
2112     smycols = mycols;
2113     svals   = vals;
2114     for (i=0; i<m; i++) {
2115       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2116       smycols += ourlens[i];
2117       svals   += ourlens[i];
2118       jj++;
2119     }
2120 
2121     /* read in other processors and ship out */
2122     for (i=1; i<size; i++) {
2123       nz   = procsnz[i];
2124       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2125       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2126     }
2127     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2128   } else {
2129     /* receive numeric values */
2130     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2131 
2132     /* receive message of values*/
2133     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2134     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2135     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2136 
2137     /* insert into matrix */
2138     jj      = rstart;
2139     smycols = mycols;
2140     svals   = vals;
2141     for (i=0; i<m; i++) {
2142       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2143       smycols += ourlens[i];
2144       svals   += ourlens[i];
2145       jj++;
2146     }
2147   }
2148   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2149   ierr = PetscFree(vals);CHKERRQ(ierr);
2150   ierr = PetscFree(mycols);CHKERRQ(ierr);
2151   ierr = PetscFree(rowners);CHKERRQ(ierr);
2152 
2153   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2155   *newmat = A;
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2161 /*
2162     Not great since it makes two copies of the submatrix, first an SeqAIJ
2163   in local and then by concatenating the local matrices the end result.
2164   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2165 */
2166 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2167 {
2168   PetscErrorCode ierr;
2169   PetscMPIInt    rank,size;
2170   int            i,m,n,rstart,row,rend,nz,*cwork,j;
2171   int            *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2172   Mat            *local,M,Mreuse;
2173   PetscScalar    *vwork,*aa;
2174   MPI_Comm       comm = mat->comm;
2175   Mat_SeqAIJ     *aij;
2176 
2177 
2178   PetscFunctionBegin;
2179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2181 
2182   if (call ==  MAT_REUSE_MATRIX) {
2183     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2184     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2185     local = &Mreuse;
2186     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2187   } else {
2188     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2189     Mreuse = *local;
2190     ierr   = PetscFree(local);CHKERRQ(ierr);
2191   }
2192 
2193   /*
2194       m - number of local rows
2195       n - number of columns (same on all processors)
2196       rstart - first row in new global matrix generated
2197   */
2198   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2199   if (call == MAT_INITIAL_MATRIX) {
2200     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2201     ii  = aij->i;
2202     jj  = aij->j;
2203 
2204     /*
2205         Determine the number of non-zeros in the diagonal and off-diagonal
2206         portions of the matrix in order to do correct preallocation
2207     */
2208 
2209     /* first get start and end of "diagonal" columns */
2210     if (csize == PETSC_DECIDE) {
2211       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2212       if (mglobal == n) { /* square matrix */
2213 	nlocal = m;
2214       } else {
2215         nlocal = n/size + ((n % size) > rank);
2216       }
2217     } else {
2218       nlocal = csize;
2219     }
2220     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2221     rstart = rend - nlocal;
2222     if (rank == size - 1 && rend != n) {
2223       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2224     }
2225 
2226     /* next, compute all the lengths */
2227     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2228     olens = dlens + m;
2229     for (i=0; i<m; i++) {
2230       jend = ii[i+1] - ii[i];
2231       olen = 0;
2232       dlen = 0;
2233       for (j=0; j<jend; j++) {
2234         if (*jj < rstart || *jj >= rend) olen++;
2235         else dlen++;
2236         jj++;
2237       }
2238       olens[i] = olen;
2239       dlens[i] = dlen;
2240     }
2241     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2242     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2244     ierr = PetscFree(dlens);CHKERRQ(ierr);
2245   } else {
2246     int ml,nl;
2247 
2248     M = *newmat;
2249     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2250     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2251     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2252     /*
2253          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2254        rather than the slower MatSetValues().
2255     */
2256     M->was_assembled = PETSC_TRUE;
2257     M->assembled     = PETSC_FALSE;
2258   }
2259   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2260   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2261   ii  = aij->i;
2262   jj  = aij->j;
2263   aa  = aij->a;
2264   for (i=0; i<m; i++) {
2265     row   = rstart + i;
2266     nz    = ii[i+1] - ii[i];
2267     cwork = jj;     jj += nz;
2268     vwork = aa;     aa += nz;
2269     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2270   }
2271 
2272   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   *newmat = M;
2275 
2276   /* save submatrix used in processor for next request */
2277   if (call ==  MAT_INITIAL_MATRIX) {
2278     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2279     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2280   }
2281 
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2287 /*@C
2288    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2289    (the default parallel PETSc format).  For good matrix assembly performance
2290    the user should preallocate the matrix storage by setting the parameters
2291    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2292    performance can be increased by more than a factor of 50.
2293 
2294    Collective on MPI_Comm
2295 
2296    Input Parameters:
2297 +  A - the matrix
2298 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2299            (same value is used for all local rows)
2300 .  d_nnz - array containing the number of nonzeros in the various rows of the
2301            DIAGONAL portion of the local submatrix (possibly different for each row)
2302            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2303            The size of this array is equal to the number of local rows, i.e 'm'.
2304            You must leave room for the diagonal entry even if it is zero.
2305 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2306            submatrix (same value is used for all local rows).
2307 -  o_nnz - array containing the number of nonzeros in the various rows of the
2308            OFF-DIAGONAL portion of the local submatrix (possibly different for
2309            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2310            structure. The size of this array is equal to the number
2311            of local rows, i.e 'm'.
2312 
2313    The AIJ format (also called the Yale sparse matrix format or
2314    compressed row storage), is fully compatible with standard Fortran 77
2315    storage.  That is, the stored row and column indices can begin at
2316    either one (as in Fortran) or zero.  See the users manual for details.
2317 
2318    The user MUST specify either the local or global matrix dimensions
2319    (possibly both).
2320 
2321    The parallel matrix is partitioned such that the first m0 rows belong to
2322    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2323    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2324 
2325    The DIAGONAL portion of the local submatrix of a processor can be defined
2326    as the submatrix which is obtained by extraction the part corresponding
2327    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2328    first row that belongs to the processor, and r2 is the last row belonging
2329    to the this processor. This is a square mxm matrix. The remaining portion
2330    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2331 
2332    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2333 
2334    By default, this format uses inodes (identical nodes) when possible.
2335    We search for consecutive rows with the same nonzero structure, thereby
2336    reusing matrix information to achieve increased efficiency.
2337 
2338    Options Database Keys:
2339 +  -mat_aij_no_inode  - Do not use inodes
2340 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2341 -  -mat_aij_oneindex - Internally use indexing starting at 1
2342         rather than 0.  Note that when calling MatSetValues(),
2343         the user still MUST index entries starting at 0!
2344 
2345    Example usage:
2346 
2347    Consider the following 8x8 matrix with 34 non-zero values, that is
2348    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2349    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2350    as follows:
2351 
2352 .vb
2353             1  2  0  |  0  3  0  |  0  4
2354     Proc0   0  5  6  |  7  0  0  |  8  0
2355             9  0 10  | 11  0  0  | 12  0
2356     -------------------------------------
2357            13  0 14  | 15 16 17  |  0  0
2358     Proc1   0 18  0  | 19 20 21  |  0  0
2359             0  0  0  | 22 23  0  | 24  0
2360     -------------------------------------
2361     Proc2  25 26 27  |  0  0 28  | 29  0
2362            30  0  0  | 31 32 33  |  0 34
2363 .ve
2364 
2365    This can be represented as a collection of submatrices as:
2366 
2367 .vb
2368       A B C
2369       D E F
2370       G H I
2371 .ve
2372 
2373    Where the submatrices A,B,C are owned by proc0, D,E,F are
2374    owned by proc1, G,H,I are owned by proc2.
2375 
2376    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2377    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2378    The 'M','N' parameters are 8,8, and have the same values on all procs.
2379 
2380    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2381    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2382    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2383    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2384    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2385    matrix, ans [DF] as another SeqAIJ matrix.
2386 
2387    When d_nz, o_nz parameters are specified, d_nz storage elements are
2388    allocated for every row of the local diagonal submatrix, and o_nz
2389    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2390    One way to choose d_nz and o_nz is to use the max nonzerors per local
2391    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2392    In this case, the values of d_nz,o_nz are:
2393 .vb
2394      proc0 : dnz = 2, o_nz = 2
2395      proc1 : dnz = 3, o_nz = 2
2396      proc2 : dnz = 1, o_nz = 4
2397 .ve
2398    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2399    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2400    for proc3. i.e we are using 12+15+10=37 storage locations to store
2401    34 values.
2402 
2403    When d_nnz, o_nnz parameters are specified, the storage is specified
2404    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2405    In the above case the values for d_nnz,o_nnz are:
2406 .vb
2407      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2408      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2409      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2410 .ve
2411    Here the space allocated is sum of all the above values i.e 34, and
2412    hence pre-allocation is perfect.
2413 
2414    Level: intermediate
2415 
2416 .keywords: matrix, aij, compressed row, sparse, parallel
2417 
2418 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2419 @*/
2420 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2421 {
2422   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2423 
2424   PetscFunctionBegin;
2425   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2426   if (f) {
2427     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2428   }
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatCreateMPIAIJ"
2434 /*@C
2435    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2436    (the default parallel PETSc format).  For good matrix assembly performance
2437    the user should preallocate the matrix storage by setting the parameters
2438    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2439    performance can be increased by more than a factor of 50.
2440 
2441    Collective on MPI_Comm
2442 
2443    Input Parameters:
2444 +  comm - MPI communicator
2445 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2446            This value should be the same as the local size used in creating the
2447            y vector for the matrix-vector product y = Ax.
2448 .  n - This value should be the same as the local size used in creating the
2449        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2450        calculated if N is given) For square matrices n is almost always m.
2451 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2452 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2453 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2454            (same value is used for all local rows)
2455 .  d_nnz - array containing the number of nonzeros in the various rows of the
2456            DIAGONAL portion of the local submatrix (possibly different for each row)
2457            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2458            The size of this array is equal to the number of local rows, i.e 'm'.
2459            You must leave room for the diagonal entry even if it is zero.
2460 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2461            submatrix (same value is used for all local rows).
2462 -  o_nnz - array containing the number of nonzeros in the various rows of the
2463            OFF-DIAGONAL portion of the local submatrix (possibly different for
2464            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2465            structure. The size of this array is equal to the number
2466            of local rows, i.e 'm'.
2467 
2468    Output Parameter:
2469 .  A - the matrix
2470 
2471    Notes:
2472    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2473    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2474    storage requirements for this matrix.
2475 
2476    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2477    processor than it must be used on all processors that share the object for
2478    that argument.
2479 
2480    The AIJ format (also called the Yale sparse matrix format or
2481    compressed row storage), is fully compatible with standard Fortran 77
2482    storage.  That is, the stored row and column indices can begin at
2483    either one (as in Fortran) or zero.  See the users manual for details.
2484 
2485    The user MUST specify either the local or global matrix dimensions
2486    (possibly both).
2487 
2488    The parallel matrix is partitioned such that the first m0 rows belong to
2489    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2490    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2491 
2492    The DIAGONAL portion of the local submatrix of a processor can be defined
2493    as the submatrix which is obtained by extraction the part corresponding
2494    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2495    first row that belongs to the processor, and r2 is the last row belonging
2496    to the this processor. This is a square mxm matrix. The remaining portion
2497    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2498 
2499    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2500 
2501    When calling this routine with a single process communicator, a matrix of
2502    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2503    type of communicator, use the construction mechanism:
2504      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2505 
2506    By default, this format uses inodes (identical nodes) when possible.
2507    We search for consecutive rows with the same nonzero structure, thereby
2508    reusing matrix information to achieve increased efficiency.
2509 
2510    Options Database Keys:
2511 +  -mat_aij_no_inode  - Do not use inodes
2512 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2513 -  -mat_aij_oneindex - Internally use indexing starting at 1
2514         rather than 0.  Note that when calling MatSetValues(),
2515         the user still MUST index entries starting at 0!
2516 
2517 
2518    Example usage:
2519 
2520    Consider the following 8x8 matrix with 34 non-zero values, that is
2521    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2522    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2523    as follows:
2524 
2525 .vb
2526             1  2  0  |  0  3  0  |  0  4
2527     Proc0   0  5  6  |  7  0  0  |  8  0
2528             9  0 10  | 11  0  0  | 12  0
2529     -------------------------------------
2530            13  0 14  | 15 16 17  |  0  0
2531     Proc1   0 18  0  | 19 20 21  |  0  0
2532             0  0  0  | 22 23  0  | 24  0
2533     -------------------------------------
2534     Proc2  25 26 27  |  0  0 28  | 29  0
2535            30  0  0  | 31 32 33  |  0 34
2536 .ve
2537 
2538    This can be represented as a collection of submatrices as:
2539 
2540 .vb
2541       A B C
2542       D E F
2543       G H I
2544 .ve
2545 
2546    Where the submatrices A,B,C are owned by proc0, D,E,F are
2547    owned by proc1, G,H,I are owned by proc2.
2548 
2549    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2550    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2551    The 'M','N' parameters are 8,8, and have the same values on all procs.
2552 
2553    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2554    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2555    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2556    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2557    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2558    matrix, ans [DF] as another SeqAIJ matrix.
2559 
2560    When d_nz, o_nz parameters are specified, d_nz storage elements are
2561    allocated for every row of the local diagonal submatrix, and o_nz
2562    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2563    One way to choose d_nz and o_nz is to use the max nonzerors per local
2564    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2565    In this case, the values of d_nz,o_nz are:
2566 .vb
2567      proc0 : dnz = 2, o_nz = 2
2568      proc1 : dnz = 3, o_nz = 2
2569      proc2 : dnz = 1, o_nz = 4
2570 .ve
2571    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2572    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2573    for proc3. i.e we are using 12+15+10=37 storage locations to store
2574    34 values.
2575 
2576    When d_nnz, o_nnz parameters are specified, the storage is specified
2577    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2578    In the above case the values for d_nnz,o_nnz are:
2579 .vb
2580      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2581      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2582      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2583 .ve
2584    Here the space allocated is sum of all the above values i.e 34, and
2585    hence pre-allocation is perfect.
2586 
2587    Level: intermediate
2588 
2589 .keywords: matrix, aij, compressed row, sparse, parallel
2590 
2591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2592 @*/
2593 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2594 {
2595   PetscErrorCode ierr;
2596   int size;
2597 
2598   PetscFunctionBegin;
2599   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2601   if (size > 1) {
2602     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2603     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2604   } else {
2605     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2606     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2607   }
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2613 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2614 {
2615   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2616   PetscFunctionBegin;
2617   *Ad     = a->A;
2618   *Ao     = a->B;
2619   *colmap = a->garray;
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 #undef __FUNCT__
2624 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2625 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2626 {
2627   PetscErrorCode ierr;
2628   int        i;
2629   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2630 
2631   PetscFunctionBegin;
2632   if (coloring->ctype == IS_COLORING_LOCAL) {
2633     ISColoringValue *allcolors,*colors;
2634     ISColoring      ocoloring;
2635 
2636     /* set coloring for diagonal portion */
2637     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2638 
2639     /* set coloring for off-diagonal portion */
2640     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2641     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2642     for (i=0; i<a->B->n; i++) {
2643       colors[i] = allcolors[a->garray[i]];
2644     }
2645     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2646     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2647     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2648     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2649   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2650     ISColoringValue *colors;
2651     int             *larray;
2652     ISColoring      ocoloring;
2653 
2654     /* set coloring for diagonal portion */
2655     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2656     for (i=0; i<a->A->n; i++) {
2657       larray[i] = i + a->cstart;
2658     }
2659     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2660     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2661     for (i=0; i<a->A->n; i++) {
2662       colors[i] = coloring->colors[larray[i]];
2663     }
2664     ierr = PetscFree(larray);CHKERRQ(ierr);
2665     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2666     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2667     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2668 
2669     /* set coloring for off-diagonal portion */
2670     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2671     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2672     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2673     for (i=0; i<a->B->n; i++) {
2674       colors[i] = coloring->colors[larray[i]];
2675     }
2676     ierr = PetscFree(larray);CHKERRQ(ierr);
2677     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2678     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2679     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2680   } else {
2681     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2682   }
2683 
2684   PetscFunctionReturn(0);
2685 }
2686 
2687 #undef __FUNCT__
2688 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2689 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2690 {
2691   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2692   PetscErrorCode ierr;
2693 
2694   PetscFunctionBegin;
2695   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2696   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 #undef __FUNCT__
2701 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2702 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2703 {
2704   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2709   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatMerge"
2715 /*@C
2716       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2717                  matrices from each processor
2718 
2719     Collective on MPI_Comm
2720 
2721    Input Parameters:
2722 +    comm - the communicators the parallel matrix will live on
2723 .    inmat - the input sequential matrices
2724 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2725 
2726    Output Parameter:
2727 .    outmat - the parallel matrix generated
2728 
2729     Level: advanced
2730 
2731    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2732 
2733 @*/
2734 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat)
2735 {
2736   PetscErrorCode ierr;
2737   int               m,n,i,rstart,nnz,I,*dnz,*onz;
2738   const int         *indx;
2739   const PetscScalar *values;
2740   PetscMap          columnmap,rowmap;
2741 
2742   PetscFunctionBegin;
2743 
2744   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2745   if (scall == MAT_INITIAL_MATRIX){
2746     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2747     ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2748     ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2749     ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2750     ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2751     ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2752 
2753     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2754     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2755     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2756     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2757     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2758 
2759     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2760     for (i=0;i<m;i++) {
2761       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2762       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2763       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2764     }
2765     /* This routine will ONLY return MPIAIJ type matrix */
2766     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2767     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2768     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2769     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2770 
2771   } else if (scall == MAT_REUSE_MATRIX){
2772     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2773   } else {
2774     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2775   }
2776 
2777   for (i=0;i<m;i++) {
2778     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2779     I    = i + rstart;
2780     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2781     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2782   }
2783   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2784   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2785   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2786 
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatFileSplit"
2792 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2793 {
2794   PetscErrorCode    ierr;
2795   PetscMPIInt       rank;
2796   int               m,N,i,rstart,nnz;
2797   size_t            len;
2798   const int         *indx;
2799   PetscViewer       out;
2800   char              *name;
2801   Mat               B;
2802   const PetscScalar *values;
2803 
2804   PetscFunctionBegin;
2805 
2806   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2807   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2808   /* Should this be the type of the diagonal block of A? */
2809   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2810   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2811   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2812   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2813   for (i=0;i<m;i++) {
2814     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2815     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2816     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2817   }
2818   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2820 
2821   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2822   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2823   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2824   sprintf(name,"%s.%d",outfile,rank);
2825   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2826   ierr = PetscFree(name);
2827   ierr = MatView(B,out);CHKERRQ(ierr);
2828   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2829   ierr = MatDestroy(B);CHKERRQ(ierr);
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */
2834   PetscMap  rowmap;
2835   int       nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r;
2836   int       *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i]
2837                          store length of i-th send/recv matrix values */
2838   MatScalar *abuf_s;
2839 } Mat_Merge_SeqsToMPI;
2840 
2841 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2844 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2845 {
2846   PetscErrorCode ierr;
2847   Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr;
2848 
2849   PetscFunctionBegin;
2850   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2851   ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr);
2852   ierr = PetscFree(merge->len_sra);CHKERRQ(ierr);
2853   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2854   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2855   ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr);
2856   ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2857   ierr = PetscFree(merge);CHKERRQ(ierr);
2858 
2859   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2860   PetscFunctionReturn(0);
2861 }
2862 
2863 #include "src/mat/utils/freespace.h"
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatMerge_SeqsToMPI"
2866 /*@C
2867       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2868                  matrices from each processor
2869 
2870     Collective on MPI_Comm
2871 
2872    Input Parameters:
2873 +    comm - the communicators the parallel matrix will live on
2874 .    seqmat - the input sequential matrices
2875 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2876 
2877    Output Parameter:
2878 .    mpimat - the parallel matrix generated
2879 
2880     Level: advanced
2881 
2882    Notes: The dimensions of the sequential matrix in each processor
2883       MUST be the same.
2884 
2885 @*/
2886 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,MatReuse scall,Mat *mpimat)
2887 {
2888   PetscErrorCode    ierr;
2889   Mat               B_seq,B_mpi;
2890   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2891   PetscMPIInt       size,rank;
2892   int               M=seqmat->m,N=seqmat->n,m,i,j,*owners,*ai=a->i,*aj=a->j,tag,taga,len,len_a,*len_s,*len_sa,proc,*len_r,*len_ra;
2893   int               **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj;
2894   MPI_Request       *s_waits,*r_waits,*s_waitsa,*r_waitsa;
2895   MPI_Status        *status;
2896   MatScalar         *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i;
2897   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
2898   Mat_Merge_SeqsToMPI *merge;
2899 
2900   PetscFunctionBegin;
2901   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2902   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2903   if (scall == MAT_INITIAL_MATRIX){
2904     ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2905   } else if (scall == MAT_REUSE_MATRIX){
2906     B_mpi = *mpimat;
2907     merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr;
2908     bi      = merge->bi;
2909     bj      = merge->bj;
2910     ijbuf_r = merge->ijbuf_r;
2911   } else {
2912     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2913   }
2914   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2915 
2916   /* determine the number of messages to send, their lengths */
2917   /*---------------------------------------------------------*/
2918   if (scall == MAT_INITIAL_MATRIX){
2919     ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
2920     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
2921     ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
2922     ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr);
2923     ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr);
2924   }
2925   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2926   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2927 
2928   len_sa  = merge->len_sra;
2929   len_ra = merge->len_sra + size;
2930 
2931   if (scall == MAT_INITIAL_MATRIX){
2932     merge->nsend = 0;
2933     len = len_a = 0;
2934     for (proc=0; proc<size; proc++){
2935       if (proc == rank) continue;
2936       len_s[proc] = len_sa[proc] = 0;
2937       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2938         len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */
2939       }
2940       if (len_sa[proc]) {
2941         merge->nsend++;
2942         len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc];
2943         if (len < len_s[proc]) len = len_s[proc];
2944         if (len_a < len_sa[proc]) len_a = len_sa[proc];
2945       }
2946     }
2947 
2948     /* determine the number and length of messages to receive */
2949     /*--------------------------------------------------------*/
2950     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
2951     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr);
2952 
2953     /* post the Irecvs corresponding to these messages */
2954     /*-------------------------------------------------*/
2955     /* get new tag to keep the communication clean */
2956     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr);
2957     ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr);
2958 
2959     /* post the sends */
2960     /*----------------*/
2961     ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2962     ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr);
2963     k = 0;
2964     for (proc=0; proc<size; proc++){
2965       if (!len_s[proc]) continue;
2966       /* form outgoing ij data to be sent to [proc] */
2967       nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc];
2968       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2969         anzi = ai[i+1] - ai[i];
2970         if (i==owners[proc]){
2971           ijbuf_s[i-owners[proc]] = anzi;
2972         } else {
2973           ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi;
2974         }
2975         for (j=0; j <anzi; j++){
2976           *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++;
2977       }
2978       }
2979       ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr);
2980       k++;
2981     }
2982 
2983     /* receives and sends of ij-structure are complete, deallocate memories */
2984     /*----------------------------------------------------------------------*/
2985     ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2986     ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2987 
2988     ierr = PetscFree(ijbuf_s);CHKERRQ(ierr);
2989     ierr = PetscFree(s_waits);CHKERRQ(ierr);
2990     ierr = PetscFree(r_waits);CHKERRQ(ierr);
2991   }
2992 
2993   /* send and recv matrix values */
2994   /*-----------------------------*/
2995   if (scall == MAT_INITIAL_MATRIX){
2996     ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr);
2997     for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */
2998   }
2999   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
3000   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr);
3001 
3002   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr);
3003   k = 0;
3004   for (proc=0; proc<size; proc++){
3005     if (!len_sa[proc]) continue;
3006     /* form outgoing mat value data to be sent to [proc] */
3007     abuf_i = merge->abuf_s;
3008     for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
3009       anzi = ai[i+1] - ai[i];
3010       for (j=0; j <anzi; j++){
3011         *abuf_i = *(aa+ai[i]+j); abuf_i++;
3012       }
3013     }
3014     ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr);
3015     k++;
3016   }
3017 
3018   ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr);
3019   ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr);
3020   ierr = PetscFree(status);CHKERRQ(ierr);
3021 
3022   ierr = PetscFree(s_waitsa);CHKERRQ(ierr);
3023   ierr = PetscFree(r_waitsa);CHKERRQ(ierr);
3024 
3025   /* create seq matrix B_seq in each processor */
3026   /*-------------------------------------------*/
3027   if (scall == MAT_INITIAL_MATRIX){
3028     ierr = PetscFree(len_s);CHKERRQ(ierr);
3029     /* allocate bi array and free space for accumulating nonzero column info */
3030     ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3031     bi[0] = 0;
3032 
3033     ierr = PetscMalloc((N+1)*sizeof(int),&lnk);CHKERRQ(ierr);
3034     ierr = PetscLLInitialize(N,N);CHKERRQ(ierr);
3035 
3036     /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3037     len = 0;
3038     for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3039     ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3040     current_space = free_space;
3041 
3042     /* determine symbolic info for each row of B_seq */
3043     for (i=0;i<m;i++) {
3044       bnzi   = 0;
3045       /* add local non-zero cols of this proc's seqmat into lnk */
3046       arow   = owners[rank] + i;
3047       anzi   = ai[arow+1] - ai[arow];
3048       aj     = a->j + ai[arow];
3049       ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr);
3050       bnzi += nlnk;
3051       /* add received col data into lnk */
3052       for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3053         /* i-th row */
3054         if (i == 0){
3055           anzi = *(ijbuf_r[k]+i);
3056           aj   = ijbuf_r[k] + m;
3057         } else {
3058           anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3059           aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3060         }
3061         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr);
3062         bnzi += nlnk;
3063       }
3064 
3065       /* if free space is not available, make more free space */
3066       if (current_space->local_remaining<bnzi) {
3067         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3068         nspacedouble++;
3069       }
3070       /* copy data into free space, then initialize lnk */
3071       ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array);CHKERRQ(ierr);
3072       current_space->array           += bnzi;
3073       current_space->local_used      += bnzi;
3074       current_space->local_remaining -= bnzi;
3075 
3076       bi[i+1] = bi[i] + bnzi;
3077     }
3078     ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3079     ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3080 
3081     ierr = PetscFree(lnk);CHKERRQ(ierr);
3082     ierr = PetscFree(len_r);CHKERRQ(ierr);
3083 
3084     /* allocate space for ba */
3085     ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3086     ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3087 
3088     /* put together B_seq */
3089     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3090     ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3091 
3092     /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3093     /*--------------------------------------------------------------*/
3094     ierr = MatMerge(comm,B_seq,scall,&B_mpi);CHKERRQ(ierr);
3095   } /* endof if (scall == MAT_INITIAL_MATRIX) */
3096 
3097   /* insert mat values of B_mpi */
3098   /*----------------------------*/
3099   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3100 
3101   /* set values of ba */
3102   for (i=0; i<m; i++) {
3103     arow = owners[rank] + i;
3104     bj_i = bj+bi[i];  /* col indices of the i-th row of B_seq */
3105     bnzi = bi[i+1] - bi[i];
3106     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3107 
3108     /* add local non-zero vals of this proc's seqmat into ba */
3109     anzi = ai[arow+1] - ai[arow];
3110     aj   = a->j + ai[arow];
3111     aa   = a->a + ai[arow];
3112     nextaj = 0;
3113     for (j=0; nextaj<anzi; j++){
3114       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3115         ba_i[j] += aa[nextaj++];
3116       }
3117     }
3118 
3119     /* add received vals into ba */
3120     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3121       /* i-th row */
3122       if (i == 0){
3123         anzi = *(ijbuf_r[k]+i);
3124         aj   = ijbuf_r[k] + m;
3125         aa   = abuf_r[k];
3126       } else {
3127         anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3128         aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3129         aa   = abuf_r[k] + *(ijbuf_r[k]+i-1);
3130       }
3131       nextaj = 0;
3132       for (j=0; nextaj<anzi; j++){
3133         if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3134           ba_i[j] += aa[nextaj++];
3135         }
3136       }
3137     }
3138 
3139     ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3140   }
3141   ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143 
3144   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3145   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3146 
3147   /* attach the supporting struct to B_mpi for reuse */
3148   /*-------------------------------------------------*/
3149   if (scall == MAT_INITIAL_MATRIX){
3150     B_mpi->spptr         = (void*)merge;
3151     B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3152     merge->bi            = bi;
3153     merge->bj            = bj;
3154     merge->ijbuf_r       = ijbuf_r;
3155 
3156     *mpimat = B_mpi;
3157   }
3158   PetscFunctionReturn(0);
3159 }
3160