xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4ce68768e41e382c1fc596e135fd299def09fddc)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   PetscErrorCode ierr;
191   int          i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int         i,j,rstart,ncols,n,flg;
351   int         *row,*col,other_disassembled;
352   PetscScalar *val;
353   InsertMode  addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387     }
388   }
389 
390   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
391     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
392   }
393   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
395 
396   if (aij->rowvalues) {
397     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
398     aij->rowvalues = 0;
399   }
400 
401   /* used by MatAXPY() */
402   a->xtoy = 0; b->xtoy = 0;
403   a->XtoY = 0; b->XtoY = 0;
404 
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatZeroRows_MPIAIJ"
423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   PetscErrorCode ierr;
427   int            i,N,*rows,*owners = l->rowners,size = l->size;
428   int            *nprocs,j,idx,nsends,row;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436   PetscTruth     found;
437 
438   PetscFunctionBegin;
439   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
440   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
441 
442   /*  first count number of contributors to each processor */
443   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
444   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
445   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
446   for (i=0; i<N; i++) {
447     idx = rows[i];
448     found = PETSC_FALSE;
449     for (j=0; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
452       }
453     }
454     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
455   }
456   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
457 
458   /* inform other processors of number of messages and max length*/
459   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
463   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
464   for (i=0; i<nrecvs; i++) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
474   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
475   starts[0] = 0;
476   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
477   for (i=0; i<N; i++) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
481 
482   starts[0] = 0;
483   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   count = 0;
485   for (i=0; i<size; i++) {
486     if (nprocs[2*i+1]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]    = n;
504     slen          += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
511   count = 0;
512   for (i=0; i<nrecvs; i++) {
513     values = rvalues + i*nmax;
514     for (j=0; j<lens[i]; j++) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PetscLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for (i = 0; i < slen; i++) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
560     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMult_MPIAIJ"
571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574   PetscErrorCode ierr;
575   int        nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_MPIAIJ"
591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
629 {
630   MPI_Comm comm;
631   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
632   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
633   IS         Me,Notme;
634   PetscErrorCode ierr;
635   int        M,N,first,last,*notme,ntids,i;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
645   if (ntids==1) PetscFunctionReturn(0);
646 
647   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
648   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
650   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
651   for (i=0; i<first; i++) notme[i] = i;
652   for (i=last; i<M; i++) notme[i-last+first] = i;
653   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
654   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
655   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
656   Aoff = Aoffs[0];
657   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
658   Boff = Boffs[0];
659   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
660   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
662   ierr = ISDestroy(Me);CHKERRQ(ierr);
663   ierr = ISDestroy(Notme);CHKERRQ(ierr);
664 
665   PetscFunctionReturn(0);
666 }
667 EXTERN_C_END
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   /* do nondiagonal part */
678   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
679   /* send it on its way */
680   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   /* do local part */
682   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
683   /* receive remote parts: note this assumes the values are not actually */
684   /* inserted in yy until the next line, which is true for my implementation*/
685   /* but is not perhaps always true. */
686   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691   This only works correctly for square matrices where the subblock A->A is the
692    diagonal block
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
697 {
698   PetscErrorCode ierr;
699   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
700 
701   PetscFunctionBegin;
702   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
703   if (a->rstart != a->cstart || a->rend != a->cend) {
704     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
705   }
706   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatScale_MPIAIJ"
712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
719   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatDestroy_MPIAIJ"
725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731 #if defined(PETSC_USE_LOG)
732   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
733 #endif
734   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
735   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
736   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
738 #if defined (PETSC_USE_CTABLE)
739   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
740 #else
741   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
742 #endif
743   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
744   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
745   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
746   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
747   ierr = PetscFree(aij);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatView_MPIAIJ_Binary"
753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
754 {
755   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
756   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
757   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
758   PetscErrorCode ierr;
759   int               nz,fd,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
760   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
761   PetscScalar       *column_values;
762 
763   PetscFunctionBegin;
764   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
765   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
766   nz   = A->nz + B->nz;
767   if (!rank) {
768     header[0] = MAT_FILE_COOKIE;
769     header[1] = mat->M;
770     header[2] = mat->N;
771     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
773     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
774     /* get largest number of rows any processor has */
775     rlen = mat->m;
776     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
777     for (i=1; i<size; i++) {
778       rlen = PetscMax(rlen,range[i+1] - range[i]);
779     }
780   } else {
781     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
782     rlen = mat->m;
783   }
784 
785   /* load up the local row counts */
786   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
787   for (i=0; i<mat->m; i++) {
788     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
789   }
790 
791   /* store the row lengths to the file */
792   if (!rank) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       rlen = range[i+1] - range[i];
797       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802   }
803   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
804 
805   /* load up the local column indices */
806   nzmax = nz; /* )th processor needs space a largest processor needs */
807   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
808   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( (col = garray[B->j[j]]) > cstart) break;
813       column_indices[cnt++] = col;
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_indices[cnt++] = A->j[k] + cstart;
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_indices[cnt++] = garray[B->j[j]];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
823 
824   /* store the column indices to the file */
825   if (!rank) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
831       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_indices);CHKERRQ(ierr);
839 
840   /* load up the local column values */
841   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
842   cnt  = 0;
843   for (i=0; i<mat->m; i++) {
844     for (j=B->i[i]; j<B->i[i+1]; j++) {
845       if ( garray[B->j[j]] > cstart) break;
846       column_values[cnt++] = B->a[j];
847     }
848     for (k=A->i[i]; k<A->i[i+1]; k++) {
849       column_values[cnt++] = A->a[k];
850     }
851     for (; j<B->i[i+1]; j++) {
852       column_values[cnt++] = B->a[j];
853     }
854   }
855   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
856 
857   /* store the column values to the file */
858   if (!rank) {
859     MPI_Status status;
860     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
861     for (i=1; i<size; i++) {
862       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
863       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
864       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
865       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
866     }
867   } else {
868     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
869     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
870   }
871   ierr = PetscFree(column_values);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
877 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
878 {
879   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
880   PetscErrorCode ierr;
881   int               rank = aij->rank,size = aij->size;
882   PetscTruth        isdraw,iascii,flg,isbinary;
883   PetscViewer       sviewer;
884   PetscViewerFormat format;
885 
886   PetscFunctionBegin;
887   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
888   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
890   if (iascii) {
891     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
892     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
893       MatInfo info;
894       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
895       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
896       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
897       if (flg) {
898         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
899 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
900       } else {
901         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
902 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
903       }
904       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
905       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
906       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
907       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
908       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
909       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
910       PetscFunctionReturn(0);
911     } else if (format == PETSC_VIEWER_ASCII_INFO) {
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ *Aloc;
938     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     if (!rank) {
942       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
943     } else {
944       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     PetscLogObjectParent(mat,A);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = aij->rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = aij->rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1017 {
1018   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec          bb1;
1021   PetscScalar  mone=-1.0;
1022 
1023   PetscFunctionBegin;
1024   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1025 
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1044       CHKERRQ(ierr);
1045     }
1046   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else {
1081     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1082   }
1083 
1084   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1090 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1091 {
1092   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1093   Mat        A = mat->A,B = mat->B;
1094   PetscErrorCode ierr;
1095   PetscReal  isend[5],irecv[5];
1096 
1097   PetscFunctionBegin;
1098   info->block_size     = 1.0;
1099   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1100   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1101   isend[3] = info->memory;  isend[4] = info->mallocs;
1102   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1103   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1104   isend[3] += info->memory;  isend[4] += info->mallocs;
1105   if (flag == MAT_LOCAL) {
1106     info->nz_used      = isend[0];
1107     info->nz_allocated = isend[1];
1108     info->nz_unneeded  = isend[2];
1109     info->memory       = isend[3];
1110     info->mallocs      = isend[4];
1111   } else if (flag == MAT_GLOBAL_MAX) {
1112     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1113     info->nz_used      = irecv[0];
1114     info->nz_allocated = irecv[1];
1115     info->nz_unneeded  = irecv[2];
1116     info->memory       = irecv[3];
1117     info->mallocs      = irecv[4];
1118   } else if (flag == MAT_GLOBAL_SUM) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   }
1126   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1127   info->fill_ratio_needed = 0;
1128   info->factor_mallocs    = 0;
1129   info->rows_global       = (double)matin->M;
1130   info->columns_global    = (double)matin->N;
1131   info->rows_local        = (double)matin->m;
1132   info->columns_local     = (double)matin->N;
1133 
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatSetOption_MPIAIJ"
1139 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1140 {
1141   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   switch (op) {
1146   case MAT_NO_NEW_NONZERO_LOCATIONS:
1147   case MAT_YES_NEW_NONZERO_LOCATIONS:
1148   case MAT_COLUMNS_UNSORTED:
1149   case MAT_COLUMNS_SORTED:
1150   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1151   case MAT_KEEP_ZEROED_ROWS:
1152   case MAT_NEW_NONZERO_LOCATION_ERR:
1153   case MAT_USE_INODES:
1154   case MAT_DO_NOT_USE_INODES:
1155   case MAT_IGNORE_ZERO_ENTRIES:
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158     break;
1159   case MAT_ROW_ORIENTED:
1160     a->roworiented = PETSC_TRUE;
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_ROWS_SORTED:
1165   case MAT_ROWS_UNSORTED:
1166   case MAT_YES_NEW_DIAGONALS:
1167     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1168     break;
1169   case MAT_COLUMN_ORIENTED:
1170     a->roworiented = PETSC_FALSE;
1171     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1172     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1173     break;
1174   case MAT_IGNORE_OFF_PROC_ENTRIES:
1175     a->donotstash = PETSC_TRUE;
1176     break;
1177   case MAT_NO_NEW_DIAGONALS:
1178     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1179   case MAT_SYMMETRIC:
1180   case MAT_STRUCTURALLY_SYMMETRIC:
1181   case MAT_HERMITIAN:
1182   case MAT_SYMMETRY_ETERNAL:
1183     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1184     break;
1185   case MAT_NOT_SYMMETRIC:
1186   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1187   case MAT_NOT_HERMITIAN:
1188   case MAT_NOT_SYMMETRY_ETERNAL:
1189     break;
1190   default:
1191     SETERRQ(PETSC_ERR_SUP,"unknown option");
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatGetRow_MPIAIJ"
1198 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1199 {
1200   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1201   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1202   PetscErrorCode ierr;
1203   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1204   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1205   int          *cmap,*idx_p;
1206 
1207   PetscFunctionBegin;
1208   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1209   mat->getrowactive = PETSC_TRUE;
1210 
1211   if (!mat->rowvalues && (idx || v)) {
1212     /*
1213         allocate enough space to hold information from the longest row.
1214     */
1215     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1216     int     max = 1,tmp;
1217     for (i=0; i<matin->m; i++) {
1218       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1219       if (max < tmp) { max = tmp; }
1220     }
1221     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1222     mat->rowindices = (int*)(mat->rowvalues + max);
1223   }
1224 
1225   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1226   lrow = row - rstart;
1227 
1228   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1229   if (!v)   {pvA = 0; pvB = 0;}
1230   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1231   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1232   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1233   nztot = nzA + nzB;
1234 
1235   cmap  = mat->garray;
1236   if (v  || idx) {
1237     if (nztot) {
1238       /* Sort by increasing column numbers, assuming A and B already sorted */
1239       int imark = -1;
1240       if (v) {
1241         *v = v_p = mat->rowvalues;
1242         for (i=0; i<nzB; i++) {
1243           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1244           else break;
1245         }
1246         imark = i;
1247         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1248         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1249       }
1250       if (idx) {
1251         *idx = idx_p = mat->rowindices;
1252         if (imark > -1) {
1253           for (i=0; i<imark; i++) {
1254             idx_p[i] = cmap[cworkB[i]];
1255           }
1256         } else {
1257           for (i=0; i<nzB; i++) {
1258             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1259             else break;
1260           }
1261           imark = i;
1262         }
1263         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1264         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1265       }
1266     } else {
1267       if (idx) *idx = 0;
1268       if (v)   *v   = 0;
1269     }
1270   }
1271   *nz = nztot;
1272   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1273   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1279 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1280 {
1281   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1282 
1283   PetscFunctionBegin;
1284   if (aij->getrowactive == PETSC_FALSE) {
1285     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1286   }
1287   aij->getrowactive = PETSC_FALSE;
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatNorm_MPIAIJ"
1293 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1294 {
1295   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1296   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1297   PetscErrorCode ierr;
1298   int          i,j,cstart = aij->cstart;
1299   PetscReal    sum = 0.0;
1300   PetscScalar  *v;
1301 
1302   PetscFunctionBegin;
1303   if (aij->size == 1) {
1304     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1305   } else {
1306     if (type == NORM_FROBENIUS) {
1307       v = amat->a;
1308       for (i=0; i<amat->nz; i++) {
1309 #if defined(PETSC_USE_COMPLEX)
1310         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1311 #else
1312         sum += (*v)*(*v); v++;
1313 #endif
1314       }
1315       v = bmat->a;
1316       for (i=0; i<bmat->nz; i++) {
1317 #if defined(PETSC_USE_COMPLEX)
1318         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1319 #else
1320         sum += (*v)*(*v); v++;
1321 #endif
1322       }
1323       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1324       *norm = sqrt(*norm);
1325     } else if (type == NORM_1) { /* max column norm */
1326       PetscReal *tmp,*tmp2;
1327       int    *jj,*garray = aij->garray;
1328       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1329       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1330       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1331       *norm = 0.0;
1332       v = amat->a; jj = amat->j;
1333       for (j=0; j<amat->nz; j++) {
1334         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1335       }
1336       v = bmat->a; jj = bmat->j;
1337       for (j=0; j<bmat->nz; j++) {
1338         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1339       }
1340       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1341       for (j=0; j<mat->N; j++) {
1342         if (tmp2[j] > *norm) *norm = tmp2[j];
1343       }
1344       ierr = PetscFree(tmp);CHKERRQ(ierr);
1345       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1346     } else if (type == NORM_INFINITY) { /* max row norm */
1347       PetscReal ntemp = 0.0;
1348       for (j=0; j<aij->A->m; j++) {
1349         v = amat->a + amat->i[j];
1350         sum = 0.0;
1351         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1352           sum += PetscAbsScalar(*v); v++;
1353         }
1354         v = bmat->a + bmat->i[j];
1355         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1356           sum += PetscAbsScalar(*v); v++;
1357         }
1358         if (sum > ntemp) ntemp = sum;
1359       }
1360       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1361     } else {
1362       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1363     }
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatTranspose_MPIAIJ"
1370 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1371 {
1372   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1373   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1374   PetscErrorCode ierr;
1375   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1376   Mat          B;
1377   PetscScalar  *array;
1378 
1379   PetscFunctionBegin;
1380   if (!matout && M != N) {
1381     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1382   }
1383 
1384   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1385   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1386   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1387 
1388   /* copy over the A part */
1389   Aloc = (Mat_SeqAIJ*)a->A->data;
1390   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1391   row = a->rstart;
1392   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1393   for (i=0; i<m; i++) {
1394     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1395     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1396   }
1397   aj = Aloc->j;
1398   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1399 
1400   /* copy over the B part */
1401   Aloc = (Mat_SeqAIJ*)a->B->data;
1402   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1403   row  = a->rstart;
1404   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1405   ct   = cols;
1406   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1407   for (i=0; i<m; i++) {
1408     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1409     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1410   }
1411   ierr = PetscFree(ct);CHKERRQ(ierr);
1412   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1413   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1414   if (matout) {
1415     *matout = B;
1416   } else {
1417     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1424 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1425 {
1426   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1427   Mat        a = aij->A,b = aij->B;
1428   PetscErrorCode ierr;
1429   int        s1,s2,s3;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1433   if (rr) {
1434     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1435     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1436     /* Overlap communication with computation. */
1437     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1438   }
1439   if (ll) {
1440     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1441     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1442     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1443   }
1444   /* scale  the diagonal block */
1445   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1446 
1447   if (rr) {
1448     /* Do a scatter end and then right scale the off-diagonal block */
1449     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1450     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1451   }
1452 
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1459 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1460 {
1461   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1462   PetscErrorCode ierr;
1463 
1464   PetscFunctionBegin;
1465   if (!a->rank) {
1466     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1473 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1474 {
1475   PetscFunctionBegin;
1476   *bs = 1;
1477   PetscFunctionReturn(0);
1478 }
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1481 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1482 {
1483   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1484   PetscErrorCode ierr;
1485 
1486   PetscFunctionBegin;
1487   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatEqual_MPIAIJ"
1493 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1494 {
1495   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1496   Mat        a,b,c,d;
1497   PetscTruth flg;
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   a = matA->A; b = matA->B;
1502   c = matB->A; d = matB->B;
1503 
1504   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1505   if (flg == PETSC_TRUE) {
1506     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1507   }
1508   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatCopy_MPIAIJ"
1514 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1515 {
1516   PetscErrorCode ierr;
1517   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1518   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1519 
1520   PetscFunctionBegin;
1521   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1522   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1523     /* because of the column compression in the off-processor part of the matrix a->B,
1524        the number of columns in a->B and b->B may be different, hence we cannot call
1525        the MatCopy() directly on the two parts. If need be, we can provide a more
1526        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1527        then copying the submatrices */
1528     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1529   } else {
1530     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1531     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1538 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1539 {
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #include "petscblaslapack.h"
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatAXPY_MPIAIJ"
1550 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1551 {
1552   PetscErrorCode ierr;
1553   int            i;
1554   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1555   PetscBLASInt   bnz,one=1;
1556   Mat_SeqAIJ     *x,*y;
1557 
1558   PetscFunctionBegin;
1559   if (str == SAME_NONZERO_PATTERN) {
1560     x = (Mat_SeqAIJ *)xx->A->data;
1561     y = (Mat_SeqAIJ *)yy->A->data;
1562     bnz = (PetscBLASInt)x->nz;
1563     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1564     x = (Mat_SeqAIJ *)xx->B->data;
1565     y = (Mat_SeqAIJ *)yy->B->data;
1566     bnz = (PetscBLASInt)x->nz;
1567     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1568   } else if (str == SUBSET_NONZERO_PATTERN) {
1569     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1570 
1571     x = (Mat_SeqAIJ *)xx->B->data;
1572     y = (Mat_SeqAIJ *)yy->B->data;
1573     if (y->xtoy && y->XtoY != xx->B) {
1574       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1575       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1576     }
1577     if (!y->xtoy) { /* get xtoy */
1578       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1579       y->XtoY = xx->B;
1580     }
1581     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1582   } else {
1583     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 /* -------------------------------------------------------------------*/
1589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1590        MatGetRow_MPIAIJ,
1591        MatRestoreRow_MPIAIJ,
1592        MatMult_MPIAIJ,
1593 /* 4*/ MatMultAdd_MPIAIJ,
1594        MatMultTranspose_MPIAIJ,
1595        MatMultTransposeAdd_MPIAIJ,
1596        0,
1597        0,
1598        0,
1599 /*10*/ 0,
1600        0,
1601        0,
1602        MatRelax_MPIAIJ,
1603        MatTranspose_MPIAIJ,
1604 /*15*/ MatGetInfo_MPIAIJ,
1605        MatEqual_MPIAIJ,
1606        MatGetDiagonal_MPIAIJ,
1607        MatDiagonalScale_MPIAIJ,
1608        MatNorm_MPIAIJ,
1609 /*20*/ MatAssemblyBegin_MPIAIJ,
1610        MatAssemblyEnd_MPIAIJ,
1611        0,
1612        MatSetOption_MPIAIJ,
1613        MatZeroEntries_MPIAIJ,
1614 /*25*/ MatZeroRows_MPIAIJ,
1615 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1616        MatLUFactorSymbolic_MPIAIJ_TFS,
1617 #else
1618        0,
1619 #endif
1620        0,
1621        0,
1622        0,
1623 /*30*/ MatSetUpPreallocation_MPIAIJ,
1624        0,
1625        0,
1626        0,
1627        0,
1628 /*35*/ MatDuplicate_MPIAIJ,
1629        0,
1630        0,
1631        0,
1632        0,
1633 /*40*/ MatAXPY_MPIAIJ,
1634        MatGetSubMatrices_MPIAIJ,
1635        MatIncreaseOverlap_MPIAIJ,
1636        MatGetValues_MPIAIJ,
1637        MatCopy_MPIAIJ,
1638 /*45*/ MatPrintHelp_MPIAIJ,
1639        MatScale_MPIAIJ,
1640        0,
1641        0,
1642        0,
1643 /*50*/ MatGetBlockSize_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        0,
1648 /*55*/ MatFDColoringCreate_MPIAIJ,
1649        0,
1650        MatSetUnfactored_MPIAIJ,
1651        0,
1652        0,
1653 /*60*/ MatGetSubMatrix_MPIAIJ,
1654        MatDestroy_MPIAIJ,
1655        MatView_MPIAIJ,
1656        MatGetPetscMaps_Petsc,
1657        0,
1658 /*65*/ 0,
1659        0,
1660        0,
1661        0,
1662        0,
1663 /*70*/ 0,
1664        0,
1665        MatSetColoring_MPIAIJ,
1666        MatSetValuesAdic_MPIAIJ,
1667        MatSetValuesAdifor_MPIAIJ,
1668 /*75*/ 0,
1669        0,
1670        0,
1671        0,
1672        0,
1673 /*80*/ 0,
1674        0,
1675        0,
1676        0,
1677 /*84*/ MatLoad_MPIAIJ,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*89*/ 0,
1683        MatMatMult_MPIAIJ_MPIAIJ,
1684        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1685        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1686 };
1687 
1688 /* ----------------------------------------------------------------------------------------*/
1689 
1690 EXTERN_C_BEGIN
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1693 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1694 {
1695   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1700   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 EXTERN_C_END
1704 
1705 EXTERN_C_BEGIN
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1708 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1709 {
1710   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1715   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 EXTERN_C_END
1719 
1720 #include "petscpc.h"
1721 EXTERN_C_BEGIN
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1724 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1725 {
1726   Mat_MPIAIJ   *b;
1727   PetscErrorCode ierr;
1728   int          i;
1729 
1730   PetscFunctionBegin;
1731   B->preallocated = PETSC_TRUE;
1732   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1733   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1734   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1735   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1736   if (d_nnz) {
1737     for (i=0; i<B->m; i++) {
1738       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1739     }
1740   }
1741   if (o_nnz) {
1742     for (i=0; i<B->m; i++) {
1743       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1744     }
1745   }
1746   b = (Mat_MPIAIJ*)B->data;
1747   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1748   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1749 
1750   PetscFunctionReturn(0);
1751 }
1752 EXTERN_C_END
1753 
1754 /*MC
1755    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1756 
1757    Options Database Keys:
1758 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1759 
1760   Level: beginner
1761 
1762 .seealso: MatCreateMPIAIJ
1763 M*/
1764 
1765 EXTERN_C_BEGIN
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatCreate_MPIAIJ"
1768 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1769 {
1770   Mat_MPIAIJ *b;
1771   PetscErrorCode ierr;
1772   int        i,size;
1773 
1774   PetscFunctionBegin;
1775   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1776 
1777   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1778   B->data         = (void*)b;
1779   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1780   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1781   B->factor       = 0;
1782   B->assembled    = PETSC_FALSE;
1783   B->mapping      = 0;
1784 
1785   B->insertmode      = NOT_SET_VALUES;
1786   b->size            = size;
1787   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1788 
1789   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1790   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1791 
1792   /* the information in the maps duplicates the information computed below, eventually
1793      we should remove the duplicate information that is not contained in the maps */
1794   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1795   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1796 
1797   /* build local table of row and column ownerships */
1798   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1799   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1800   b->cowners = b->rowners + b->size + 2;
1801   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1802   b->rowners[0] = 0;
1803   for (i=2; i<=b->size; i++) {
1804     b->rowners[i] += b->rowners[i-1];
1805   }
1806   b->rstart = b->rowners[b->rank];
1807   b->rend   = b->rowners[b->rank+1];
1808   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1809   b->cowners[0] = 0;
1810   for (i=2; i<=b->size; i++) {
1811     b->cowners[i] += b->cowners[i-1];
1812   }
1813   b->cstart = b->cowners[b->rank];
1814   b->cend   = b->cowners[b->rank+1];
1815 
1816   /* build cache for off array entries formed */
1817   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1818   b->donotstash  = PETSC_FALSE;
1819   b->colmap      = 0;
1820   b->garray      = 0;
1821   b->roworiented = PETSC_TRUE;
1822 
1823   /* stuff used for matrix vector multiply */
1824   b->lvec      = PETSC_NULL;
1825   b->Mvctx     = PETSC_NULL;
1826 
1827   /* stuff for MatGetRow() */
1828   b->rowindices   = 0;
1829   b->rowvalues    = 0;
1830   b->getrowactive = PETSC_FALSE;
1831 
1832   /* Explicitly create 2 MATSEQAIJ matrices. */
1833   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1834   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1835   PetscLogObjectParent(B,b->A);
1836   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1837   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1838   PetscLogObjectParent(B,b->B);
1839 
1840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1841                                      "MatStoreValues_MPIAIJ",
1842                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1844                                      "MatRetrieveValues_MPIAIJ",
1845                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1847 				     "MatGetDiagonalBlock_MPIAIJ",
1848                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1850 				     "MatIsTranspose_MPIAIJ",
1851 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1853 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1854 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1856 				     "MatDiagonalScaleLocal_MPIAIJ",
1857 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 EXTERN_C_END
1861 
1862 /*MC
1863    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1864 
1865    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1866    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1867   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1868   for communicators controlling multiple processes.  It is recommended that you call both of
1869   the above preallocation routines for simplicity.
1870 
1871    Options Database Keys:
1872 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1873 
1874   Level: beginner
1875 
1876 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1877 M*/
1878 
1879 EXTERN_C_BEGIN
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatCreate_AIJ"
1882 PetscErrorCode MatCreate_AIJ(Mat A)
1883 {
1884   PetscErrorCode ierr;
1885   int size;
1886 
1887   PetscFunctionBegin;
1888   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1889   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1890   if (size == 1) {
1891     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1892   } else {
1893     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 EXTERN_C_END
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1901 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1902 {
1903   Mat        mat;
1904   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   *newmat       = 0;
1909   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1910   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1911   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1912   a    = (Mat_MPIAIJ*)mat->data;
1913 
1914   mat->factor       = matin->factor;
1915   mat->assembled    = PETSC_TRUE;
1916   mat->insertmode   = NOT_SET_VALUES;
1917   mat->preallocated = PETSC_TRUE;
1918 
1919   a->rstart       = oldmat->rstart;
1920   a->rend         = oldmat->rend;
1921   a->cstart       = oldmat->cstart;
1922   a->cend         = oldmat->cend;
1923   a->size         = oldmat->size;
1924   a->rank         = oldmat->rank;
1925   a->donotstash   = oldmat->donotstash;
1926   a->roworiented  = oldmat->roworiented;
1927   a->rowindices   = 0;
1928   a->rowvalues    = 0;
1929   a->getrowactive = PETSC_FALSE;
1930 
1931   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1932   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1933   if (oldmat->colmap) {
1934 #if defined (PETSC_USE_CTABLE)
1935     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1936 #else
1937     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1938     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1939     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1940 #endif
1941   } else a->colmap = 0;
1942   if (oldmat->garray) {
1943     int len;
1944     len  = oldmat->B->n;
1945     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1946     PetscLogObjectMemory(mat,len*sizeof(int));
1947     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1948   } else a->garray = 0;
1949 
1950   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1951   PetscLogObjectParent(mat,a->lvec);
1952   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1953   PetscLogObjectParent(mat,a->Mvctx);
1954   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1955   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1956   PetscLogObjectParent(mat,a->A);
1957   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1958   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1959   PetscLogObjectParent(mat,a->B);
1960   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1961   *newmat = mat;
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #include "petscsys.h"
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatLoad_MPIAIJ"
1969 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1970 {
1971   Mat          A;
1972   PetscScalar  *vals,*svals;
1973   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1974   MPI_Status   status;
1975   PetscErrorCode ierr;
1976   int          i,nz,j,rstart,rend,fd;
1977   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1978   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1979   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1980 
1981   PetscFunctionBegin;
1982   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1983   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1984   if (!rank) {
1985     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1986     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1987     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1988     if (header[3] < 0) {
1989       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1990     }
1991   }
1992 
1993   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1994   M = header[1]; N = header[2];
1995   /* determine ownership of all rows */
1996   m = M/size + ((M % size) > rank);
1997   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1998   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1999   rowners[0] = 0;
2000   for (i=2; i<=size; i++) {
2001     rowners[i] += rowners[i-1];
2002   }
2003   rstart = rowners[rank];
2004   rend   = rowners[rank+1];
2005 
2006   /* distribute row lengths to all processors */
2007   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2008   offlens = ourlens + (rend-rstart);
2009   if (!rank) {
2010     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2011     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2012     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2013     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2014     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2015     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2016   } else {
2017     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2018   }
2019 
2020   if (!rank) {
2021     /* calculate the number of nonzeros on each processor */
2022     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2023     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2024     for (i=0; i<size; i++) {
2025       for (j=rowners[i]; j< rowners[i+1]; j++) {
2026         procsnz[i] += rowlengths[j];
2027       }
2028     }
2029     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2030 
2031     /* determine max buffer needed and allocate it */
2032     maxnz = 0;
2033     for (i=0; i<size; i++) {
2034       maxnz = PetscMax(maxnz,procsnz[i]);
2035     }
2036     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2037 
2038     /* read in my part of the matrix column indices  */
2039     nz   = procsnz[0];
2040     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2041     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2042 
2043     /* read in every one elses and ship off */
2044     for (i=1; i<size; i++) {
2045       nz   = procsnz[i];
2046       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2047       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2048     }
2049     ierr = PetscFree(cols);CHKERRQ(ierr);
2050   } else {
2051     /* determine buffer space needed for message */
2052     nz = 0;
2053     for (i=0; i<m; i++) {
2054       nz += ourlens[i];
2055     }
2056     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2057 
2058     /* receive message of column indices*/
2059     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2060     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2061     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2062   }
2063 
2064   /* determine column ownership if matrix is not square */
2065   if (N != M) {
2066     n      = N/size + ((N % size) > rank);
2067     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2068     cstart = cend - n;
2069   } else {
2070     cstart = rstart;
2071     cend   = rend;
2072     n      = cend - cstart;
2073   }
2074 
2075   /* loop over local rows, determining number of off diagonal entries */
2076   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2077   jj = 0;
2078   for (i=0; i<m; i++) {
2079     for (j=0; j<ourlens[i]; j++) {
2080       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2081       jj++;
2082     }
2083   }
2084 
2085   /* create our matrix */
2086   for (i=0; i<m; i++) {
2087     ourlens[i] -= offlens[i];
2088   }
2089   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2090   ierr = MatSetType(A,type);CHKERRQ(ierr);
2091   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2092 
2093   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2094   for (i=0; i<m; i++) {
2095     ourlens[i] += offlens[i];
2096   }
2097 
2098   if (!rank) {
2099     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2100 
2101     /* read in my part of the matrix numerical values  */
2102     nz   = procsnz[0];
2103     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2104 
2105     /* insert into matrix */
2106     jj      = rstart;
2107     smycols = mycols;
2108     svals   = vals;
2109     for (i=0; i<m; i++) {
2110       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2111       smycols += ourlens[i];
2112       svals   += ourlens[i];
2113       jj++;
2114     }
2115 
2116     /* read in other processors and ship out */
2117     for (i=1; i<size; i++) {
2118       nz   = procsnz[i];
2119       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2120       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2121     }
2122     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2123   } else {
2124     /* receive numeric values */
2125     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2126 
2127     /* receive message of values*/
2128     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2129     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2130     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2131 
2132     /* insert into matrix */
2133     jj      = rstart;
2134     smycols = mycols;
2135     svals   = vals;
2136     for (i=0; i<m; i++) {
2137       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2138       smycols += ourlens[i];
2139       svals   += ourlens[i];
2140       jj++;
2141     }
2142   }
2143   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2144   ierr = PetscFree(vals);CHKERRQ(ierr);
2145   ierr = PetscFree(mycols);CHKERRQ(ierr);
2146   ierr = PetscFree(rowners);CHKERRQ(ierr);
2147 
2148   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2150   *newmat = A;
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2156 /*
2157     Not great since it makes two copies of the submatrix, first an SeqAIJ
2158   in local and then by concatenating the local matrices the end result.
2159   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2160 */
2161 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2162 {
2163   PetscErrorCode ierr;
2164   int          i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2165   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2166   Mat          *local,M,Mreuse;
2167   PetscScalar  *vwork,*aa;
2168   MPI_Comm     comm = mat->comm;
2169   Mat_SeqAIJ   *aij;
2170 
2171 
2172   PetscFunctionBegin;
2173   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2175 
2176   if (call ==  MAT_REUSE_MATRIX) {
2177     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2178     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2179     local = &Mreuse;
2180     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2181   } else {
2182     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2183     Mreuse = *local;
2184     ierr   = PetscFree(local);CHKERRQ(ierr);
2185   }
2186 
2187   /*
2188       m - number of local rows
2189       n - number of columns (same on all processors)
2190       rstart - first row in new global matrix generated
2191   */
2192   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2193   if (call == MAT_INITIAL_MATRIX) {
2194     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2195     ii  = aij->i;
2196     jj  = aij->j;
2197 
2198     /*
2199         Determine the number of non-zeros in the diagonal and off-diagonal
2200         portions of the matrix in order to do correct preallocation
2201     */
2202 
2203     /* first get start and end of "diagonal" columns */
2204     if (csize == PETSC_DECIDE) {
2205       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2206       if (mglobal == n) { /* square matrix */
2207 	nlocal = m;
2208       } else {
2209         nlocal = n/size + ((n % size) > rank);
2210       }
2211     } else {
2212       nlocal = csize;
2213     }
2214     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2215     rstart = rend - nlocal;
2216     if (rank == size - 1 && rend != n) {
2217       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2218     }
2219 
2220     /* next, compute all the lengths */
2221     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2222     olens = dlens + m;
2223     for (i=0; i<m; i++) {
2224       jend = ii[i+1] - ii[i];
2225       olen = 0;
2226       dlen = 0;
2227       for (j=0; j<jend; j++) {
2228         if (*jj < rstart || *jj >= rend) olen++;
2229         else dlen++;
2230         jj++;
2231       }
2232       olens[i] = olen;
2233       dlens[i] = dlen;
2234     }
2235     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2236     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2237     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2238     ierr = PetscFree(dlens);CHKERRQ(ierr);
2239   } else {
2240     int ml,nl;
2241 
2242     M = *newmat;
2243     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2244     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2245     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2246     /*
2247          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2248        rather than the slower MatSetValues().
2249     */
2250     M->was_assembled = PETSC_TRUE;
2251     M->assembled     = PETSC_FALSE;
2252   }
2253   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2254   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2255   ii  = aij->i;
2256   jj  = aij->j;
2257   aa  = aij->a;
2258   for (i=0; i<m; i++) {
2259     row   = rstart + i;
2260     nz    = ii[i+1] - ii[i];
2261     cwork = jj;     jj += nz;
2262     vwork = aa;     aa += nz;
2263     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2264   }
2265 
2266   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2267   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2268   *newmat = M;
2269 
2270   /* save submatrix used in processor for next request */
2271   if (call ==  MAT_INITIAL_MATRIX) {
2272     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2273     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2274   }
2275 
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2281 /*@C
2282    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2283    (the default parallel PETSc format).  For good matrix assembly performance
2284    the user should preallocate the matrix storage by setting the parameters
2285    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2286    performance can be increased by more than a factor of 50.
2287 
2288    Collective on MPI_Comm
2289 
2290    Input Parameters:
2291 +  A - the matrix
2292 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2293            (same value is used for all local rows)
2294 .  d_nnz - array containing the number of nonzeros in the various rows of the
2295            DIAGONAL portion of the local submatrix (possibly different for each row)
2296            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2297            The size of this array is equal to the number of local rows, i.e 'm'.
2298            You must leave room for the diagonal entry even if it is zero.
2299 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2300            submatrix (same value is used for all local rows).
2301 -  o_nnz - array containing the number of nonzeros in the various rows of the
2302            OFF-DIAGONAL portion of the local submatrix (possibly different for
2303            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2304            structure. The size of this array is equal to the number
2305            of local rows, i.e 'm'.
2306 
2307    The AIJ format (also called the Yale sparse matrix format or
2308    compressed row storage), is fully compatible with standard Fortran 77
2309    storage.  That is, the stored row and column indices can begin at
2310    either one (as in Fortran) or zero.  See the users manual for details.
2311 
2312    The user MUST specify either the local or global matrix dimensions
2313    (possibly both).
2314 
2315    The parallel matrix is partitioned such that the first m0 rows belong to
2316    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2317    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2318 
2319    The DIAGONAL portion of the local submatrix of a processor can be defined
2320    as the submatrix which is obtained by extraction the part corresponding
2321    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2322    first row that belongs to the processor, and r2 is the last row belonging
2323    to the this processor. This is a square mxm matrix. The remaining portion
2324    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2325 
2326    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2327 
2328    By default, this format uses inodes (identical nodes) when possible.
2329    We search for consecutive rows with the same nonzero structure, thereby
2330    reusing matrix information to achieve increased efficiency.
2331 
2332    Options Database Keys:
2333 +  -mat_aij_no_inode  - Do not use inodes
2334 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2335 -  -mat_aij_oneindex - Internally use indexing starting at 1
2336         rather than 0.  Note that when calling MatSetValues(),
2337         the user still MUST index entries starting at 0!
2338 
2339    Example usage:
2340 
2341    Consider the following 8x8 matrix with 34 non-zero values, that is
2342    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2343    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2344    as follows:
2345 
2346 .vb
2347             1  2  0  |  0  3  0  |  0  4
2348     Proc0   0  5  6  |  7  0  0  |  8  0
2349             9  0 10  | 11  0  0  | 12  0
2350     -------------------------------------
2351            13  0 14  | 15 16 17  |  0  0
2352     Proc1   0 18  0  | 19 20 21  |  0  0
2353             0  0  0  | 22 23  0  | 24  0
2354     -------------------------------------
2355     Proc2  25 26 27  |  0  0 28  | 29  0
2356            30  0  0  | 31 32 33  |  0 34
2357 .ve
2358 
2359    This can be represented as a collection of submatrices as:
2360 
2361 .vb
2362       A B C
2363       D E F
2364       G H I
2365 .ve
2366 
2367    Where the submatrices A,B,C are owned by proc0, D,E,F are
2368    owned by proc1, G,H,I are owned by proc2.
2369 
2370    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2371    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2372    The 'M','N' parameters are 8,8, and have the same values on all procs.
2373 
2374    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2375    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2376    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2377    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2378    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2379    matrix, ans [DF] as another SeqAIJ matrix.
2380 
2381    When d_nz, o_nz parameters are specified, d_nz storage elements are
2382    allocated for every row of the local diagonal submatrix, and o_nz
2383    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2384    One way to choose d_nz and o_nz is to use the max nonzerors per local
2385    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2386    In this case, the values of d_nz,o_nz are:
2387 .vb
2388      proc0 : dnz = 2, o_nz = 2
2389      proc1 : dnz = 3, o_nz = 2
2390      proc2 : dnz = 1, o_nz = 4
2391 .ve
2392    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2393    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2394    for proc3. i.e we are using 12+15+10=37 storage locations to store
2395    34 values.
2396 
2397    When d_nnz, o_nnz parameters are specified, the storage is specified
2398    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2399    In the above case the values for d_nnz,o_nnz are:
2400 .vb
2401      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2402      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2403      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2404 .ve
2405    Here the space allocated is sum of all the above values i.e 34, and
2406    hence pre-allocation is perfect.
2407 
2408    Level: intermediate
2409 
2410 .keywords: matrix, aij, compressed row, sparse, parallel
2411 
2412 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2413 @*/
2414 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2415 {
2416   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2420   if (f) {
2421     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2422   }
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatCreateMPIAIJ"
2428 /*@C
2429    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2430    (the default parallel PETSc format).  For good matrix assembly performance
2431    the user should preallocate the matrix storage by setting the parameters
2432    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2433    performance can be increased by more than a factor of 50.
2434 
2435    Collective on MPI_Comm
2436 
2437    Input Parameters:
2438 +  comm - MPI communicator
2439 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2440            This value should be the same as the local size used in creating the
2441            y vector for the matrix-vector product y = Ax.
2442 .  n - This value should be the same as the local size used in creating the
2443        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2444        calculated if N is given) For square matrices n is almost always m.
2445 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2446 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2447 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2448            (same value is used for all local rows)
2449 .  d_nnz - array containing the number of nonzeros in the various rows of the
2450            DIAGONAL portion of the local submatrix (possibly different for each row)
2451            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2452            The size of this array is equal to the number of local rows, i.e 'm'.
2453            You must leave room for the diagonal entry even if it is zero.
2454 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2455            submatrix (same value is used for all local rows).
2456 -  o_nnz - array containing the number of nonzeros in the various rows of the
2457            OFF-DIAGONAL portion of the local submatrix (possibly different for
2458            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2459            structure. The size of this array is equal to the number
2460            of local rows, i.e 'm'.
2461 
2462    Output Parameter:
2463 .  A - the matrix
2464 
2465    Notes:
2466    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2467    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2468    storage requirements for this matrix.
2469 
2470    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2471    processor than it must be used on all processors that share the object for
2472    that argument.
2473 
2474    The AIJ format (also called the Yale sparse matrix format or
2475    compressed row storage), is fully compatible with standard Fortran 77
2476    storage.  That is, the stored row and column indices can begin at
2477    either one (as in Fortran) or zero.  See the users manual for details.
2478 
2479    The user MUST specify either the local or global matrix dimensions
2480    (possibly both).
2481 
2482    The parallel matrix is partitioned such that the first m0 rows belong to
2483    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2484    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2485 
2486    The DIAGONAL portion of the local submatrix of a processor can be defined
2487    as the submatrix which is obtained by extraction the part corresponding
2488    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2489    first row that belongs to the processor, and r2 is the last row belonging
2490    to the this processor. This is a square mxm matrix. The remaining portion
2491    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2492 
2493    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2494 
2495    When calling this routine with a single process communicator, a matrix of
2496    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2497    type of communicator, use the construction mechanism:
2498      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2499 
2500    By default, this format uses inodes (identical nodes) when possible.
2501    We search for consecutive rows with the same nonzero structure, thereby
2502    reusing matrix information to achieve increased efficiency.
2503 
2504    Options Database Keys:
2505 +  -mat_aij_no_inode  - Do not use inodes
2506 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2507 -  -mat_aij_oneindex - Internally use indexing starting at 1
2508         rather than 0.  Note that when calling MatSetValues(),
2509         the user still MUST index entries starting at 0!
2510 
2511 
2512    Example usage:
2513 
2514    Consider the following 8x8 matrix with 34 non-zero values, that is
2515    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2516    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2517    as follows:
2518 
2519 .vb
2520             1  2  0  |  0  3  0  |  0  4
2521     Proc0   0  5  6  |  7  0  0  |  8  0
2522             9  0 10  | 11  0  0  | 12  0
2523     -------------------------------------
2524            13  0 14  | 15 16 17  |  0  0
2525     Proc1   0 18  0  | 19 20 21  |  0  0
2526             0  0  0  | 22 23  0  | 24  0
2527     -------------------------------------
2528     Proc2  25 26 27  |  0  0 28  | 29  0
2529            30  0  0  | 31 32 33  |  0 34
2530 .ve
2531 
2532    This can be represented as a collection of submatrices as:
2533 
2534 .vb
2535       A B C
2536       D E F
2537       G H I
2538 .ve
2539 
2540    Where the submatrices A,B,C are owned by proc0, D,E,F are
2541    owned by proc1, G,H,I are owned by proc2.
2542 
2543    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2544    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2545    The 'M','N' parameters are 8,8, and have the same values on all procs.
2546 
2547    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2548    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2549    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2550    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2551    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2552    matrix, ans [DF] as another SeqAIJ matrix.
2553 
2554    When d_nz, o_nz parameters are specified, d_nz storage elements are
2555    allocated for every row of the local diagonal submatrix, and o_nz
2556    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2557    One way to choose d_nz and o_nz is to use the max nonzerors per local
2558    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2559    In this case, the values of d_nz,o_nz are:
2560 .vb
2561      proc0 : dnz = 2, o_nz = 2
2562      proc1 : dnz = 3, o_nz = 2
2563      proc2 : dnz = 1, o_nz = 4
2564 .ve
2565    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2566    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2567    for proc3. i.e we are using 12+15+10=37 storage locations to store
2568    34 values.
2569 
2570    When d_nnz, o_nnz parameters are specified, the storage is specified
2571    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2572    In the above case the values for d_nnz,o_nnz are:
2573 .vb
2574      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2575      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2576      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2577 .ve
2578    Here the space allocated is sum of all the above values i.e 34, and
2579    hence pre-allocation is perfect.
2580 
2581    Level: intermediate
2582 
2583 .keywords: matrix, aij, compressed row, sparse, parallel
2584 
2585 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2586 @*/
2587 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2588 {
2589   PetscErrorCode ierr;
2590   int size;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2595   if (size > 1) {
2596     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2597     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2598   } else {
2599     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2600     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2601   }
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2608 {
2609   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2610   PetscFunctionBegin;
2611   *Ad     = a->A;
2612   *Ao     = a->B;
2613   *colmap = a->garray;
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 #undef __FUNCT__
2618 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2619 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2620 {
2621   PetscErrorCode ierr;
2622   int        i;
2623   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2624 
2625   PetscFunctionBegin;
2626   if (coloring->ctype == IS_COLORING_LOCAL) {
2627     ISColoringValue *allcolors,*colors;
2628     ISColoring      ocoloring;
2629 
2630     /* set coloring for diagonal portion */
2631     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2632 
2633     /* set coloring for off-diagonal portion */
2634     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2635     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2636     for (i=0; i<a->B->n; i++) {
2637       colors[i] = allcolors[a->garray[i]];
2638     }
2639     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2640     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2641     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2642     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2643   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2644     ISColoringValue *colors;
2645     int             *larray;
2646     ISColoring      ocoloring;
2647 
2648     /* set coloring for diagonal portion */
2649     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2650     for (i=0; i<a->A->n; i++) {
2651       larray[i] = i + a->cstart;
2652     }
2653     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2654     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2655     for (i=0; i<a->A->n; i++) {
2656       colors[i] = coloring->colors[larray[i]];
2657     }
2658     ierr = PetscFree(larray);CHKERRQ(ierr);
2659     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2660     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2661     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2662 
2663     /* set coloring for off-diagonal portion */
2664     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2665     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2666     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2667     for (i=0; i<a->B->n; i++) {
2668       colors[i] = coloring->colors[larray[i]];
2669     }
2670     ierr = PetscFree(larray);CHKERRQ(ierr);
2671     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2672     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2673     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2674   } else {
2675     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2676   }
2677 
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2683 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2684 {
2685   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2690   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2696 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2697 {
2698   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2699   PetscErrorCode ierr;
2700 
2701   PetscFunctionBegin;
2702   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2703   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 #undef __FUNCT__
2708 #define __FUNCT__ "MatMerge"
2709 /*@C
2710       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2711                  matrices from each processor
2712 
2713     Collective on MPI_Comm
2714 
2715    Input Parameters:
2716 +    comm - the communicators the parallel matrix will live on
2717 .    inmat - the input sequential matrices
2718 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2719 
2720    Output Parameter:
2721 .    outmat - the parallel matrix generated
2722 
2723     Level: advanced
2724 
2725    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2726 
2727 @*/
2728 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat)
2729 {
2730   PetscErrorCode ierr;
2731   int               m,n,i,rstart,nnz,I,*dnz,*onz;
2732   const int         *indx;
2733   const PetscScalar *values;
2734   PetscMap          columnmap,rowmap;
2735 
2736   PetscFunctionBegin;
2737 
2738   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2739   if (scall == MAT_INITIAL_MATRIX){
2740     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2741     ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2742     ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2743     ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2744     ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2745     ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2746 
2747     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2748     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2749     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2750     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2751     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2752 
2753     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2754     for (i=0;i<m;i++) {
2755       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2756       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2757       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2758     }
2759     /* This routine will ONLY return MPIAIJ type matrix */
2760     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2761     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2762     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2763     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2764 
2765   } else if (scall == MAT_REUSE_MATRIX){
2766     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2767   } else {
2768     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2769   }
2770 
2771   for (i=0;i<m;i++) {
2772     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2773     I    = i + rstart;
2774     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2775     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2776   }
2777   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2778   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2779   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2780 
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 #undef __FUNCT__
2785 #define __FUNCT__ "MatFileSplit"
2786 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2787 {
2788   PetscErrorCode ierr;
2789   int               rank,m,N,i,rstart,nnz;
2790   size_t            len;
2791   const int         *indx;
2792   PetscViewer       out;
2793   char              *name;
2794   Mat               B;
2795   const PetscScalar *values;
2796 
2797   PetscFunctionBegin;
2798 
2799   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2800   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2801   /* Should this be the type of the diagonal block of A? */
2802   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2803   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2804   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2805   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2806   for (i=0;i<m;i++) {
2807     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2808     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2809     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2810   }
2811   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2813 
2814   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2815   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2816   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2817   sprintf(name,"%s.%d",outfile,rank);
2818   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2819   ierr = PetscFree(name);
2820   ierr = MatView(B,out);CHKERRQ(ierr);
2821   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2822   ierr = MatDestroy(B);CHKERRQ(ierr);
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */
2827   PetscMap  rowmap;
2828   int       nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r;
2829   int       *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i]
2830                          store length of i-th send/recv matrix values */
2831   MatScalar *abuf_s;
2832 } Mat_Merge_SeqsToMPI;
2833 
2834 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2837 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2838 {
2839   PetscErrorCode ierr;
2840   Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr;
2841 
2842   PetscFunctionBegin;
2843   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2844   ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr);
2845   ierr = PetscFree(merge->len_sra);CHKERRQ(ierr);
2846   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2847   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2848   ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr);
2849   ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2850   ierr = PetscFree(merge);CHKERRQ(ierr);
2851 
2852   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 #include "src/mat/utils/freespace.h"
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatMerge_SeqsToMPI"
2859 /*@C
2860       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2861                  matrices from each processor
2862 
2863     Collective on MPI_Comm
2864 
2865    Input Parameters:
2866 +    comm - the communicators the parallel matrix will live on
2867 .    seqmat - the input sequential matrices
2868 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2869 
2870    Output Parameter:
2871 .    mpimat - the parallel matrix generated
2872 
2873     Level: advanced
2874 
2875    Notes: The dimensions of the sequential matrix in each processor
2876       MUST be the same.
2877 
2878 @*/
2879 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,MatReuse scall,Mat *mpimat)
2880 {
2881   PetscErrorCode    ierr;
2882   Mat               B_seq,B_mpi;
2883   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2884   int               size,rank,M=seqmat->m,N=seqmat->n,m,i,j,*owners,
2885                     *ai=a->i,*aj=a->j,tag,taga,len,len_a,*len_s,*len_sa,proc,*len_r,*len_ra,
2886                     **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,
2887                     *bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj;
2888   MPI_Request       *s_waits,*r_waits,*s_waitsa,*r_waitsa;
2889   MPI_Status        *status;
2890   MatScalar         *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i;
2891   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
2892   Mat_Merge_SeqsToMPI *merge;
2893 
2894   PetscFunctionBegin;
2895   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2896   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2897   if (scall == MAT_INITIAL_MATRIX){
2898     ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2899   } else if (scall == MAT_REUSE_MATRIX){
2900     B_mpi = *mpimat;
2901     merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr;
2902     bi      = merge->bi;
2903     bj      = merge->bj;
2904     ijbuf_r = merge->ijbuf_r;
2905   } else {
2906     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2907   }
2908   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2909 
2910   /* determine the number of messages to send, their lengths */
2911   /*---------------------------------------------------------*/
2912   if (scall == MAT_INITIAL_MATRIX){
2913     ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
2914     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
2915     ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
2916     ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr);
2917     ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr);
2918   }
2919   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2920   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2921 
2922   len_sa  = merge->len_sra;
2923   len_ra = merge->len_sra + size;
2924 
2925   if (scall == MAT_INITIAL_MATRIX){
2926     merge->nsend = 0;
2927     len = len_a = 0;
2928     for (proc=0; proc<size; proc++){
2929       if (proc == rank) continue;
2930       len_s[proc] = len_sa[proc] = 0;
2931       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2932         len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */
2933       }
2934       if (len_sa[proc]) {
2935         merge->nsend++;
2936         len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc];
2937         if (len < len_s[proc]) len = len_s[proc];
2938         if (len_a < len_sa[proc]) len_a = len_sa[proc];
2939       }
2940     }
2941 
2942     /* determine the number and length of messages to receive */
2943     /*--------------------------------------------------------*/
2944     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
2945     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr);
2946 
2947     /* post the Irecvs corresponding to these messages */
2948     /*-------------------------------------------------*/
2949     /* get new tag to keep the communication clean */
2950     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr);
2951     ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr);
2952 
2953     /* post the sends */
2954     /*----------------*/
2955     ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2956     ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr);
2957     k = 0;
2958     for (proc=0; proc<size; proc++){
2959       if (!len_s[proc]) continue;
2960       /* form outgoing ij data to be sent to [proc] */
2961       nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc];
2962       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2963         anzi = ai[i+1] - ai[i];
2964         if (i==owners[proc]){
2965           ijbuf_s[i-owners[proc]] = anzi;
2966         } else {
2967           ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi;
2968         }
2969         for (j=0; j <anzi; j++){
2970           *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++;
2971       }
2972       }
2973       ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr);
2974       k++;
2975     }
2976 
2977     /* receives and sends of ij-structure are complete, deallocate memories */
2978     /*----------------------------------------------------------------------*/
2979     ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2980     ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2981 
2982     ierr = PetscFree(ijbuf_s);CHKERRQ(ierr);
2983     ierr = PetscFree(s_waits);CHKERRQ(ierr);
2984     ierr = PetscFree(r_waits);CHKERRQ(ierr);
2985   }
2986 
2987   /* send and recv matrix values */
2988   /*-----------------------------*/
2989   if (scall == MAT_INITIAL_MATRIX){
2990     ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr);
2991     for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */
2992   }
2993   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2994   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr);
2995 
2996   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr);
2997   k = 0;
2998   for (proc=0; proc<size; proc++){
2999     if (!len_sa[proc]) continue;
3000     /* form outgoing mat value data to be sent to [proc] */
3001     abuf_i = merge->abuf_s;
3002     for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
3003       anzi = ai[i+1] - ai[i];
3004       for (j=0; j <anzi; j++){
3005         *abuf_i = *(aa+ai[i]+j); abuf_i++;
3006       }
3007     }
3008     ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr);
3009     k++;
3010   }
3011 
3012   ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr);
3013   ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr);
3014   ierr = PetscFree(status);CHKERRQ(ierr);
3015 
3016   ierr = PetscFree(s_waitsa);CHKERRQ(ierr);
3017   ierr = PetscFree(r_waitsa);CHKERRQ(ierr);
3018 
3019   /* create seq matrix B_seq in each processor */
3020   /*-------------------------------------------*/
3021   if (scall == MAT_INITIAL_MATRIX){
3022     ierr = PetscFree(len_s);CHKERRQ(ierr);
3023     /* allocate bi array and free space for accumulating nonzero column info */
3024     ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3025     bi[0] = 0;
3026 
3027     ierr = PetscMalloc((N+1)*sizeof(int),&lnk);CHKERRQ(ierr);
3028     ierr = PetscLLInitialize(N,N);CHKERRQ(ierr);
3029 
3030     /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3031     len = 0;
3032     for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3033     ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3034     current_space = free_space;
3035 
3036     /* determine symbolic info for each row of B_seq */
3037     for (i=0;i<m;i++) {
3038       bnzi   = 0;
3039       /* add local non-zero cols of this proc's seqmat into lnk */
3040       arow   = owners[rank] + i;
3041       anzi   = ai[arow+1] - ai[arow];
3042       aj     = a->j + ai[arow];
3043       ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr);
3044       bnzi += nlnk;
3045       /* add received col data into lnk */
3046       for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3047         /* i-th row */
3048         if (i == 0){
3049           anzi = *(ijbuf_r[k]+i);
3050           aj   = ijbuf_r[k] + m;
3051         } else {
3052           anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3053           aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3054         }
3055         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk);CHKERRQ(ierr);
3056         bnzi += nlnk;
3057       }
3058 
3059       /* if free space is not available, make more free space */
3060       if (current_space->local_remaining<bnzi) {
3061         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3062         nspacedouble++;
3063       }
3064       /* copy data into free space, then initialize lnk */
3065       ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array);CHKERRQ(ierr);
3066       current_space->array           += bnzi;
3067       current_space->local_used      += bnzi;
3068       current_space->local_remaining -= bnzi;
3069 
3070       bi[i+1] = bi[i] + bnzi;
3071     }
3072     ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3073     ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3074 
3075     ierr = PetscFree(lnk);CHKERRQ(ierr);
3076     ierr = PetscFree(len_r);CHKERRQ(ierr);
3077 
3078     /* allocate space for ba */
3079     ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3080     ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3081 
3082     /* put together B_seq */
3083     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3084     ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3085 
3086     /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3087     /*--------------------------------------------------------------*/
3088     ierr = MatMerge(comm,B_seq,scall,&B_mpi);CHKERRQ(ierr);
3089   } /* endof if (scall == MAT_INITIAL_MATRIX) */
3090 
3091   /* insert mat values of B_mpi */
3092   /*----------------------------*/
3093   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3094 
3095   /* set values of ba */
3096   for (i=0; i<m; i++) {
3097     arow = owners[rank] + i;
3098     bj_i = bj+bi[i];  /* col indices of the i-th row of B_seq */
3099     bnzi = bi[i+1] - bi[i];
3100     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3101 
3102     /* add local non-zero vals of this proc's seqmat into ba */
3103     anzi = ai[arow+1] - ai[arow];
3104     aj   = a->j + ai[arow];
3105     aa   = a->a + ai[arow];
3106     nextaj = 0;
3107     for (j=0; nextaj<anzi; j++){
3108       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3109         ba_i[j] += aa[nextaj++];
3110       }
3111     }
3112 
3113     /* add received vals into ba */
3114     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3115       /* i-th row */
3116       if (i == 0){
3117         anzi = *(ijbuf_r[k]+i);
3118         aj   = ijbuf_r[k] + m;
3119         aa   = abuf_r[k];
3120       } else {
3121         anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3122         aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3123         aa   = abuf_r[k] + *(ijbuf_r[k]+i-1);
3124       }
3125       nextaj = 0;
3126       for (j=0; nextaj<anzi; j++){
3127         if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3128           ba_i[j] += aa[nextaj++];
3129         }
3130       }
3131     }
3132 
3133     ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3134   }
3135   ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3136   ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3137 
3138   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3139   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3140 
3141   /* attach the supporting struct to B_mpi for reuse */
3142   /*-------------------------------------------------*/
3143   if (scall == MAT_INITIAL_MATRIX){
3144     B_mpi->spptr         = (void*)merge;
3145     B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3146     merge->bi            = bi;
3147     merge->bj            = bj;
3148     merge->ijbuf_r       = ijbuf_r;
3149 
3150     *mpimat = B_mpi;
3151   }
3152   PetscFunctionReturn(0);
3153 }
3154