xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4bf112e79d3f87ddad6dbf7b300a6f889c1e7d34)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   PetscErrorCode ierr;
191   int          i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int         i,j,rstart,ncols,n,flg;
351   int         *row,*col,other_disassembled;
352   PetscScalar *val;
353   InsertMode  addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387     }
388   }
389 
390   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
391     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
392   }
393   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
395 
396   if (aij->rowvalues) {
397     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
398     aij->rowvalues = 0;
399   }
400 
401   /* used by MatAXPY() */
402   a->xtoy = 0; b->xtoy = 0;
403   a->XtoY = 0; b->XtoY = 0;
404 
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatZeroRows_MPIAIJ"
423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   PetscErrorCode ierr;
427   int            i,N,*rows,*owners = l->rowners,size = l->size;
428   int            *nprocs,j,idx,nsends,row;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436   PetscTruth     found;
437 
438   PetscFunctionBegin;
439   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
440   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
441 
442   /*  first count number of contributors to each processor */
443   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
444   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
445   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
446   for (i=0; i<N; i++) {
447     idx = rows[i];
448     found = PETSC_FALSE;
449     for (j=0; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
452       }
453     }
454     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
455   }
456   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
457 
458   /* inform other processors of number of messages and max length*/
459   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
463   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
464   for (i=0; i<nrecvs; i++) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
474   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
475   starts[0] = 0;
476   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
477   for (i=0; i<N; i++) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
481 
482   starts[0] = 0;
483   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   count = 0;
485   for (i=0; i<size; i++) {
486     if (nprocs[2*i+1]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]    = n;
504     slen          += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
511   count = 0;
512   for (i=0; i<nrecvs; i++) {
513     values = rvalues + i*nmax;
514     for (j=0; j<lens[i]; j++) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PetscLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for (i = 0; i < slen; i++) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
560     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMult_MPIAIJ"
571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574   PetscErrorCode ierr;
575   int        nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_MPIAIJ"
591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
629 {
630   MPI_Comm comm;
631   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
632   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
633   IS         Me,Notme;
634   PetscErrorCode ierr;
635   int        M,N,first,last,*notme,ntids,i;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
645   if (ntids==1) PetscFunctionReturn(0);
646 
647   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
648   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
650   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
651   for (i=0; i<first; i++) notme[i] = i;
652   for (i=last; i<M; i++) notme[i-last+first] = i;
653   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
654   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
655   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
656   Aoff = Aoffs[0];
657   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
658   Boff = Boffs[0];
659   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
660   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
662   ierr = ISDestroy(Me);CHKERRQ(ierr);
663   ierr = ISDestroy(Notme);CHKERRQ(ierr);
664 
665   PetscFunctionReturn(0);
666 }
667 EXTERN_C_END
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   /* do nondiagonal part */
678   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
679   /* send it on its way */
680   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   /* do local part */
682   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
683   /* receive remote parts: note this assumes the values are not actually */
684   /* inserted in yy until the next line, which is true for my implementation*/
685   /* but is not perhaps always true. */
686   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691   This only works correctly for square matrices where the subblock A->A is the
692    diagonal block
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
697 {
698   PetscErrorCode ierr;
699   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
700 
701   PetscFunctionBegin;
702   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
703   if (a->rstart != a->cstart || a->rend != a->cend) {
704     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
705   }
706   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatScale_MPIAIJ"
712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
719   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatDestroy_MPIAIJ"
725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731 #if defined(PETSC_USE_LOG)
732   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
733 #endif
734   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
735   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
736   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
738 #if defined (PETSC_USE_CTABLE)
739   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
740 #else
741   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
742 #endif
743   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
744   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
745   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
746   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
747   ierr = PetscFree(aij);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatView_MPIAIJ_Binary"
753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
754 {
755   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
756   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
757   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
758   PetscErrorCode    ierr;
759   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
760   int               fd;
761   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
762   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
763   PetscScalar       *column_values;
764 
765   PetscFunctionBegin;
766   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
767   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
768   nz   = A->nz + B->nz;
769   if (!rank) {
770     header[0] = MAT_FILE_COOKIE;
771     header[1] = mat->M;
772     header[2] = mat->N;
773     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
775     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
776     /* get largest number of rows any processor has */
777     rlen = mat->m;
778     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
779     for (i=1; i<size; i++) {
780       rlen = PetscMax(rlen,range[i+1] - range[i]);
781     }
782   } else {
783     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
784     rlen = mat->m;
785   }
786 
787   /* load up the local row counts */
788   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
789   for (i=0; i<mat->m; i++) {
790     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
791   }
792 
793   /* store the row lengths to the file */
794   if (!rank) {
795     MPI_Status status;
796     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
797     for (i=1; i<size; i++) {
798       rlen = range[i+1] - range[i];
799       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
800       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
801     }
802   } else {
803     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
806 
807   /* load up the local column indices */
808   nzmax = nz; /* )th processor needs space a largest processor needs */
809   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
810   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
811   cnt  = 0;
812   for (i=0; i<mat->m; i++) {
813     for (j=B->i[i]; j<B->i[i+1]; j++) {
814       if ( (col = garray[B->j[j]]) > cstart) break;
815       column_indices[cnt++] = col;
816     }
817     for (k=A->i[i]; k<A->i[i+1]; k++) {
818       column_indices[cnt++] = A->j[k] + cstart;
819     }
820     for (; j<B->i[i+1]; j++) {
821       column_indices[cnt++] = garray[B->j[j]];
822     }
823   }
824   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
825 
826   /* store the column indices to the file */
827   if (!rank) {
828     MPI_Status status;
829     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
830     for (i=1; i<size; i++) {
831       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
833       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
834       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
835     }
836   } else {
837     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
838     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
839   }
840   ierr = PetscFree(column_indices);CHKERRQ(ierr);
841 
842   /* load up the local column values */
843   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
844   cnt  = 0;
845   for (i=0; i<mat->m; i++) {
846     for (j=B->i[i]; j<B->i[i+1]; j++) {
847       if ( garray[B->j[j]] > cstart) break;
848       column_values[cnt++] = B->a[j];
849     }
850     for (k=A->i[i]; k<A->i[i+1]; k++) {
851       column_values[cnt++] = A->a[k];
852     }
853     for (; j<B->i[i+1]; j++) {
854       column_values[cnt++] = B->a[j];
855     }
856   }
857   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
858 
859   /* store the column values to the file */
860   if (!rank) {
861     MPI_Status status;
862     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
863     for (i=1; i<size; i++) {
864       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
865       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
866       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
867       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
868     }
869   } else {
870     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
871     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(column_values);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
879 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
880 {
881   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
882   PetscErrorCode    ierr;
883   PetscMPIInt       rank = aij->rank,size = aij->size;
884   PetscTruth        isdraw,iascii,flg,isbinary;
885   PetscViewer       sviewer;
886   PetscViewerFormat format;
887 
888   PetscFunctionBegin;
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
892   if (iascii) {
893     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
894     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
895       MatInfo info;
896       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
897       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
899       if (flg) {
900         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
901 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
902       } else {
903         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
904 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
905       }
906       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
907       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
908       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
909       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
910       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
911       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_INFO) {
914       PetscFunctionReturn(0);
915     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
916       PetscFunctionReturn(0);
917     }
918   } else if (isbinary) {
919     if (size == 1) {
920       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
921       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
922     } else {
923       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
924     }
925     PetscFunctionReturn(0);
926   } else if (isdraw) {
927     PetscDraw  draw;
928     PetscTruth isnull;
929     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
930     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
931   }
932 
933   if (size == 1) {
934     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
935     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
936   } else {
937     /* assemble the entire matrix onto first processor. */
938     Mat         A;
939     Mat_SeqAIJ *Aloc;
940     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
941     PetscScalar *a;
942 
943     if (!rank) {
944       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
945     } else {
946       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     PetscLogObjectParent(mat,A);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = aij->rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = aij->rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatRelax_MPIAIJ"
1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1019 {
1020   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1021   PetscErrorCode ierr;
1022   Vec          bb1;
1023   PetscScalar  mone=-1.0;
1024 
1025   PetscFunctionBegin;
1026   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1027 
1028   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1029 
1030   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1031     if (flag & SOR_ZERO_INITIAL_GUESS) {
1032       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1033       its--;
1034     }
1035 
1036     while (its--) {
1037       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039 
1040       /* update rhs: bb1 = bb - B*x */
1041       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1042       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1043 
1044       /* local sweep */
1045       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1046       CHKERRQ(ierr);
1047     }
1048   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1049     if (flag & SOR_ZERO_INITIAL_GUESS) {
1050       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1051       its--;
1052     }
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1066     if (flag & SOR_ZERO_INITIAL_GUESS) {
1067       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1068       its--;
1069     }
1070     while (its--) {
1071       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1073 
1074       /* update rhs: bb1 = bb - B*x */
1075       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1076       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1077 
1078       /* local sweep */
1079       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1080       CHKERRQ(ierr);
1081     }
1082   } else {
1083     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1084   }
1085 
1086   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1092 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1093 {
1094   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1095   Mat        A = mat->A,B = mat->B;
1096   PetscErrorCode ierr;
1097   PetscReal  isend[5],irecv[5];
1098 
1099   PetscFunctionBegin;
1100   info->block_size     = 1.0;
1101   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1102   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1103   isend[3] = info->memory;  isend[4] = info->mallocs;
1104   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1105   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1106   isend[3] += info->memory;  isend[4] += info->mallocs;
1107   if (flag == MAT_LOCAL) {
1108     info->nz_used      = isend[0];
1109     info->nz_allocated = isend[1];
1110     info->nz_unneeded  = isend[2];
1111     info->memory       = isend[3];
1112     info->mallocs      = isend[4];
1113   } else if (flag == MAT_GLOBAL_MAX) {
1114     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1115     info->nz_used      = irecv[0];
1116     info->nz_allocated = irecv[1];
1117     info->nz_unneeded  = irecv[2];
1118     info->memory       = irecv[3];
1119     info->mallocs      = irecv[4];
1120   } else if (flag == MAT_GLOBAL_SUM) {
1121     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1122     info->nz_used      = irecv[0];
1123     info->nz_allocated = irecv[1];
1124     info->nz_unneeded  = irecv[2];
1125     info->memory       = irecv[3];
1126     info->mallocs      = irecv[4];
1127   }
1128   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1129   info->fill_ratio_needed = 0;
1130   info->factor_mallocs    = 0;
1131   info->rows_global       = (double)matin->M;
1132   info->columns_global    = (double)matin->N;
1133   info->rows_local        = (double)matin->m;
1134   info->columns_local     = (double)matin->N;
1135 
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSetOption_MPIAIJ"
1141 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1142 {
1143   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   switch (op) {
1148   case MAT_NO_NEW_NONZERO_LOCATIONS:
1149   case MAT_YES_NEW_NONZERO_LOCATIONS:
1150   case MAT_COLUMNS_UNSORTED:
1151   case MAT_COLUMNS_SORTED:
1152   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1153   case MAT_KEEP_ZEROED_ROWS:
1154   case MAT_NEW_NONZERO_LOCATION_ERR:
1155   case MAT_USE_INODES:
1156   case MAT_DO_NOT_USE_INODES:
1157   case MAT_IGNORE_ZERO_ENTRIES:
1158     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1159     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1160     break;
1161   case MAT_ROW_ORIENTED:
1162     a->roworiented = PETSC_TRUE;
1163     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1164     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1165     break;
1166   case MAT_ROWS_SORTED:
1167   case MAT_ROWS_UNSORTED:
1168   case MAT_YES_NEW_DIAGONALS:
1169     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1170     break;
1171   case MAT_COLUMN_ORIENTED:
1172     a->roworiented = PETSC_FALSE;
1173     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1174     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1175     break;
1176   case MAT_IGNORE_OFF_PROC_ENTRIES:
1177     a->donotstash = PETSC_TRUE;
1178     break;
1179   case MAT_NO_NEW_DIAGONALS:
1180     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1181   case MAT_SYMMETRIC:
1182   case MAT_STRUCTURALLY_SYMMETRIC:
1183   case MAT_HERMITIAN:
1184   case MAT_SYMMETRY_ETERNAL:
1185     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1186     break;
1187   case MAT_NOT_SYMMETRIC:
1188   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1189   case MAT_NOT_HERMITIAN:
1190   case MAT_NOT_SYMMETRY_ETERNAL:
1191     break;
1192   default:
1193     SETERRQ(PETSC_ERR_SUP,"unknown option");
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatGetRow_MPIAIJ"
1200 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1201 {
1202   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1203   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1204   PetscErrorCode ierr;
1205   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1206   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1207   int          *cmap,*idx_p;
1208 
1209   PetscFunctionBegin;
1210   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1211   mat->getrowactive = PETSC_TRUE;
1212 
1213   if (!mat->rowvalues && (idx || v)) {
1214     /*
1215         allocate enough space to hold information from the longest row.
1216     */
1217     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1218     int     max = 1,tmp;
1219     for (i=0; i<matin->m; i++) {
1220       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1221       if (max < tmp) { max = tmp; }
1222     }
1223     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1224     mat->rowindices = (int*)(mat->rowvalues + max);
1225   }
1226 
1227   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1228   lrow = row - rstart;
1229 
1230   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1231   if (!v)   {pvA = 0; pvB = 0;}
1232   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1233   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1234   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1235   nztot = nzA + nzB;
1236 
1237   cmap  = mat->garray;
1238   if (v  || idx) {
1239     if (nztot) {
1240       /* Sort by increasing column numbers, assuming A and B already sorted */
1241       int imark = -1;
1242       if (v) {
1243         *v = v_p = mat->rowvalues;
1244         for (i=0; i<nzB; i++) {
1245           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1246           else break;
1247         }
1248         imark = i;
1249         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1250         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1251       }
1252       if (idx) {
1253         *idx = idx_p = mat->rowindices;
1254         if (imark > -1) {
1255           for (i=0; i<imark; i++) {
1256             idx_p[i] = cmap[cworkB[i]];
1257           }
1258         } else {
1259           for (i=0; i<nzB; i++) {
1260             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1261             else break;
1262           }
1263           imark = i;
1264         }
1265         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1266         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1267       }
1268     } else {
1269       if (idx) *idx = 0;
1270       if (v)   *v   = 0;
1271     }
1272   }
1273   *nz = nztot;
1274   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1275   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1281 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1282 {
1283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1284 
1285   PetscFunctionBegin;
1286   if (aij->getrowactive == PETSC_FALSE) {
1287     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1288   }
1289   aij->getrowactive = PETSC_FALSE;
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatNorm_MPIAIJ"
1295 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1296 {
1297   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1298   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1299   PetscErrorCode ierr;
1300   int          i,j,cstart = aij->cstart;
1301   PetscReal    sum = 0.0;
1302   PetscScalar  *v;
1303 
1304   PetscFunctionBegin;
1305   if (aij->size == 1) {
1306     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1307   } else {
1308     if (type == NORM_FROBENIUS) {
1309       v = amat->a;
1310       for (i=0; i<amat->nz; i++) {
1311 #if defined(PETSC_USE_COMPLEX)
1312         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1313 #else
1314         sum += (*v)*(*v); v++;
1315 #endif
1316       }
1317       v = bmat->a;
1318       for (i=0; i<bmat->nz; i++) {
1319 #if defined(PETSC_USE_COMPLEX)
1320         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1321 #else
1322         sum += (*v)*(*v); v++;
1323 #endif
1324       }
1325       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1326       *norm = sqrt(*norm);
1327     } else if (type == NORM_1) { /* max column norm */
1328       PetscReal *tmp,*tmp2;
1329       int    *jj,*garray = aij->garray;
1330       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1331       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1332       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1333       *norm = 0.0;
1334       v = amat->a; jj = amat->j;
1335       for (j=0; j<amat->nz; j++) {
1336         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1337       }
1338       v = bmat->a; jj = bmat->j;
1339       for (j=0; j<bmat->nz; j++) {
1340         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1341       }
1342       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1343       for (j=0; j<mat->N; j++) {
1344         if (tmp2[j] > *norm) *norm = tmp2[j];
1345       }
1346       ierr = PetscFree(tmp);CHKERRQ(ierr);
1347       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1348     } else if (type == NORM_INFINITY) { /* max row norm */
1349       PetscReal ntemp = 0.0;
1350       for (j=0; j<aij->A->m; j++) {
1351         v = amat->a + amat->i[j];
1352         sum = 0.0;
1353         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1354           sum += PetscAbsScalar(*v); v++;
1355         }
1356         v = bmat->a + bmat->i[j];
1357         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1358           sum += PetscAbsScalar(*v); v++;
1359         }
1360         if (sum > ntemp) ntemp = sum;
1361       }
1362       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1363     } else {
1364       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1365     }
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatTranspose_MPIAIJ"
1372 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1373 {
1374   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1375   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1376   PetscErrorCode ierr;
1377   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1378   Mat          B;
1379   PetscScalar  *array;
1380 
1381   PetscFunctionBegin;
1382   if (!matout && M != N) {
1383     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1384   }
1385 
1386   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1387   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1388   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1389 
1390   /* copy over the A part */
1391   Aloc = (Mat_SeqAIJ*)a->A->data;
1392   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1393   row = a->rstart;
1394   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1395   for (i=0; i<m; i++) {
1396     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1397     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1398   }
1399   aj = Aloc->j;
1400   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1401 
1402   /* copy over the B part */
1403   Aloc = (Mat_SeqAIJ*)a->B->data;
1404   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1405   row  = a->rstart;
1406   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1407   ct   = cols;
1408   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1409   for (i=0; i<m; i++) {
1410     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1411     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1412   }
1413   ierr = PetscFree(ct);CHKERRQ(ierr);
1414   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1416   if (matout) {
1417     *matout = B;
1418   } else {
1419     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1426 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1427 {
1428   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1429   Mat        a = aij->A,b = aij->B;
1430   PetscErrorCode ierr;
1431   int        s1,s2,s3;
1432 
1433   PetscFunctionBegin;
1434   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1435   if (rr) {
1436     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1437     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1438     /* Overlap communication with computation. */
1439     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1440   }
1441   if (ll) {
1442     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1443     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1444     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1445   }
1446   /* scale  the diagonal block */
1447   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1448 
1449   if (rr) {
1450     /* Do a scatter end and then right scale the off-diagonal block */
1451     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1452     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1461 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1462 {
1463   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1464   PetscErrorCode ierr;
1465 
1466   PetscFunctionBegin;
1467   if (!a->rank) {
1468     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1475 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1476 {
1477   PetscFunctionBegin;
1478   *bs = 1;
1479   PetscFunctionReturn(0);
1480 }
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1483 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1484 {
1485   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1486   PetscErrorCode ierr;
1487 
1488   PetscFunctionBegin;
1489   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatEqual_MPIAIJ"
1495 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1496 {
1497   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1498   Mat        a,b,c,d;
1499   PetscTruth flg;
1500   PetscErrorCode ierr;
1501 
1502   PetscFunctionBegin;
1503   a = matA->A; b = matA->B;
1504   c = matB->A; d = matB->B;
1505 
1506   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1507   if (flg == PETSC_TRUE) {
1508     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1509   }
1510   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatCopy_MPIAIJ"
1516 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1517 {
1518   PetscErrorCode ierr;
1519   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1520   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1521 
1522   PetscFunctionBegin;
1523   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1524   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1525     /* because of the column compression in the off-processor part of the matrix a->B,
1526        the number of columns in a->B and b->B may be different, hence we cannot call
1527        the MatCopy() directly on the two parts. If need be, we can provide a more
1528        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1529        then copying the submatrices */
1530     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1531   } else {
1532     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1533     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1540 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1541 {
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #include "petscblaslapack.h"
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatAXPY_MPIAIJ"
1552 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1553 {
1554   PetscErrorCode ierr;
1555   int            i;
1556   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1557   PetscBLASInt   bnz,one=1;
1558   Mat_SeqAIJ     *x,*y;
1559 
1560   PetscFunctionBegin;
1561   if (str == SAME_NONZERO_PATTERN) {
1562     x = (Mat_SeqAIJ *)xx->A->data;
1563     y = (Mat_SeqAIJ *)yy->A->data;
1564     bnz = (PetscBLASInt)x->nz;
1565     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1566     x = (Mat_SeqAIJ *)xx->B->data;
1567     y = (Mat_SeqAIJ *)yy->B->data;
1568     bnz = (PetscBLASInt)x->nz;
1569     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1570   } else if (str == SUBSET_NONZERO_PATTERN) {
1571     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1572 
1573     x = (Mat_SeqAIJ *)xx->B->data;
1574     y = (Mat_SeqAIJ *)yy->B->data;
1575     if (y->xtoy && y->XtoY != xx->B) {
1576       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1577       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1578     }
1579     if (!y->xtoy) { /* get xtoy */
1580       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1581       y->XtoY = xx->B;
1582     }
1583     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1584   } else {
1585     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 /* -------------------------------------------------------------------*/
1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1592        MatGetRow_MPIAIJ,
1593        MatRestoreRow_MPIAIJ,
1594        MatMult_MPIAIJ,
1595 /* 4*/ MatMultAdd_MPIAIJ,
1596        MatMultTranspose_MPIAIJ,
1597        MatMultTransposeAdd_MPIAIJ,
1598        0,
1599        0,
1600        0,
1601 /*10*/ 0,
1602        0,
1603        0,
1604        MatRelax_MPIAIJ,
1605        MatTranspose_MPIAIJ,
1606 /*15*/ MatGetInfo_MPIAIJ,
1607        MatEqual_MPIAIJ,
1608        MatGetDiagonal_MPIAIJ,
1609        MatDiagonalScale_MPIAIJ,
1610        MatNorm_MPIAIJ,
1611 /*20*/ MatAssemblyBegin_MPIAIJ,
1612        MatAssemblyEnd_MPIAIJ,
1613        0,
1614        MatSetOption_MPIAIJ,
1615        MatZeroEntries_MPIAIJ,
1616 /*25*/ MatZeroRows_MPIAIJ,
1617 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1618        MatLUFactorSymbolic_MPIAIJ_TFS,
1619 #else
1620        0,
1621 #endif
1622        0,
1623        0,
1624        0,
1625 /*30*/ MatSetUpPreallocation_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        0,
1630 /*35*/ MatDuplicate_MPIAIJ,
1631        0,
1632        0,
1633        0,
1634        0,
1635 /*40*/ MatAXPY_MPIAIJ,
1636        MatGetSubMatrices_MPIAIJ,
1637        MatIncreaseOverlap_MPIAIJ,
1638        MatGetValues_MPIAIJ,
1639        MatCopy_MPIAIJ,
1640 /*45*/ MatPrintHelp_MPIAIJ,
1641        MatScale_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645 /*50*/ MatGetBlockSize_MPIAIJ,
1646        0,
1647        0,
1648        0,
1649        0,
1650 /*55*/ MatFDColoringCreate_MPIAIJ,
1651        0,
1652        MatSetUnfactored_MPIAIJ,
1653        0,
1654        0,
1655 /*60*/ MatGetSubMatrix_MPIAIJ,
1656        MatDestroy_MPIAIJ,
1657        MatView_MPIAIJ,
1658        MatGetPetscMaps_Petsc,
1659        0,
1660 /*65*/ 0,
1661        0,
1662        0,
1663        0,
1664        0,
1665 /*70*/ 0,
1666        0,
1667        MatSetColoring_MPIAIJ,
1668        MatSetValuesAdic_MPIAIJ,
1669        MatSetValuesAdifor_MPIAIJ,
1670 /*75*/ 0,
1671        0,
1672        0,
1673        0,
1674        0,
1675 /*80*/ 0,
1676        0,
1677        0,
1678        0,
1679 /*84*/ MatLoad_MPIAIJ,
1680        0,
1681        0,
1682        0,
1683        0,
1684 /*89*/ 0,
1685        MatMatMult_MPIAIJ_MPIAIJ,
1686        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1687        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1688        MatPtAP_MPIAIJ_MPIAIJ,
1689 /*94*/ MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1690        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1691 };
1692 
1693 /* ----------------------------------------------------------------------------------------*/
1694 
1695 EXTERN_C_BEGIN
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1698 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1699 {
1700   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1701   PetscErrorCode ierr;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1705   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 EXTERN_C_END
1709 
1710 EXTERN_C_BEGIN
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1713 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1714 {
1715   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1720   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 #include "petscpc.h"
1726 EXTERN_C_BEGIN
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1729 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1730 {
1731   Mat_MPIAIJ   *b;
1732   PetscErrorCode ierr;
1733   int          i;
1734 
1735   PetscFunctionBegin;
1736   B->preallocated = PETSC_TRUE;
1737   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1738   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1739   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1740   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1741   if (d_nnz) {
1742     for (i=0; i<B->m; i++) {
1743       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1744     }
1745   }
1746   if (o_nnz) {
1747     for (i=0; i<B->m; i++) {
1748       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1749     }
1750   }
1751   b = (Mat_MPIAIJ*)B->data;
1752   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1753   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1754 
1755   PetscFunctionReturn(0);
1756 }
1757 EXTERN_C_END
1758 
1759 /*MC
1760    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1761 
1762    Options Database Keys:
1763 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1764 
1765   Level: beginner
1766 
1767 .seealso: MatCreateMPIAIJ
1768 M*/
1769 
1770 EXTERN_C_BEGIN
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatCreate_MPIAIJ"
1773 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1774 {
1775   Mat_MPIAIJ *b;
1776   PetscErrorCode ierr;
1777   int        i,size;
1778 
1779   PetscFunctionBegin;
1780   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1781 
1782   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1783   B->data         = (void*)b;
1784   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1785   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1786   B->factor       = 0;
1787   B->assembled    = PETSC_FALSE;
1788   B->mapping      = 0;
1789 
1790   B->insertmode      = NOT_SET_VALUES;
1791   b->size            = size;
1792   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1793 
1794   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1795   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1796 
1797   /* the information in the maps duplicates the information computed below, eventually
1798      we should remove the duplicate information that is not contained in the maps */
1799   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1800   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1801 
1802   /* build local table of row and column ownerships */
1803   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1804   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1805   b->cowners = b->rowners + b->size + 2;
1806   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1807   b->rowners[0] = 0;
1808   for (i=2; i<=b->size; i++) {
1809     b->rowners[i] += b->rowners[i-1];
1810   }
1811   b->rstart = b->rowners[b->rank];
1812   b->rend   = b->rowners[b->rank+1];
1813   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1814   b->cowners[0] = 0;
1815   for (i=2; i<=b->size; i++) {
1816     b->cowners[i] += b->cowners[i-1];
1817   }
1818   b->cstart = b->cowners[b->rank];
1819   b->cend   = b->cowners[b->rank+1];
1820 
1821   /* build cache for off array entries formed */
1822   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1823   b->donotstash  = PETSC_FALSE;
1824   b->colmap      = 0;
1825   b->garray      = 0;
1826   b->roworiented = PETSC_TRUE;
1827 
1828   /* stuff used for matrix vector multiply */
1829   b->lvec      = PETSC_NULL;
1830   b->Mvctx     = PETSC_NULL;
1831 
1832   /* stuff for MatGetRow() */
1833   b->rowindices   = 0;
1834   b->rowvalues    = 0;
1835   b->getrowactive = PETSC_FALSE;
1836 
1837   /* Explicitly create 2 MATSEQAIJ matrices. */
1838   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1839   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1840   PetscLogObjectParent(B,b->A);
1841   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1842   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1843   PetscLogObjectParent(B,b->B);
1844 
1845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1846                                      "MatStoreValues_MPIAIJ",
1847                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1849                                      "MatRetrieveValues_MPIAIJ",
1850                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1852 				     "MatGetDiagonalBlock_MPIAIJ",
1853                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1855 				     "MatIsTranspose_MPIAIJ",
1856 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1858 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1859 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1861 				     "MatDiagonalScaleLocal_MPIAIJ",
1862 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 EXTERN_C_END
1866 
1867 /*MC
1868    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1869 
1870    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1871    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1872   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1873   for communicators controlling multiple processes.  It is recommended that you call both of
1874   the above preallocation routines for simplicity.
1875 
1876    Options Database Keys:
1877 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1878 
1879   Level: beginner
1880 
1881 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1882 M*/
1883 
1884 EXTERN_C_BEGIN
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatCreate_AIJ"
1887 PetscErrorCode MatCreate_AIJ(Mat A)
1888 {
1889   PetscErrorCode ierr;
1890   int size;
1891 
1892   PetscFunctionBegin;
1893   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1894   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1895   if (size == 1) {
1896     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1897   } else {
1898     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 EXTERN_C_END
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1906 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1907 {
1908   Mat        mat;
1909   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1910   PetscErrorCode ierr;
1911 
1912   PetscFunctionBegin;
1913   *newmat       = 0;
1914   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1915   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1916   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1917   a    = (Mat_MPIAIJ*)mat->data;
1918 
1919   mat->factor       = matin->factor;
1920   mat->assembled    = PETSC_TRUE;
1921   mat->insertmode   = NOT_SET_VALUES;
1922   mat->preallocated = PETSC_TRUE;
1923 
1924   a->rstart       = oldmat->rstart;
1925   a->rend         = oldmat->rend;
1926   a->cstart       = oldmat->cstart;
1927   a->cend         = oldmat->cend;
1928   a->size         = oldmat->size;
1929   a->rank         = oldmat->rank;
1930   a->donotstash   = oldmat->donotstash;
1931   a->roworiented  = oldmat->roworiented;
1932   a->rowindices   = 0;
1933   a->rowvalues    = 0;
1934   a->getrowactive = PETSC_FALSE;
1935 
1936   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1937   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1938   if (oldmat->colmap) {
1939 #if defined (PETSC_USE_CTABLE)
1940     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1941 #else
1942     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1943     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1944     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1945 #endif
1946   } else a->colmap = 0;
1947   if (oldmat->garray) {
1948     int len;
1949     len  = oldmat->B->n;
1950     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1951     PetscLogObjectMemory(mat,len*sizeof(int));
1952     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1953   } else a->garray = 0;
1954 
1955   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1956   PetscLogObjectParent(mat,a->lvec);
1957   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1958   PetscLogObjectParent(mat,a->Mvctx);
1959   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1960   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1961   PetscLogObjectParent(mat,a->A);
1962   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1963   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1964   PetscLogObjectParent(mat,a->B);
1965   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1966   *newmat = mat;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #include "petscsys.h"
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLoad_MPIAIJ"
1974 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1975 {
1976   Mat            A;
1977   PetscScalar    *vals,*svals;
1978   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1979   MPI_Status     status;
1980   PetscErrorCode ierr;
1981   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1982   int            i,nz,j,rstart,rend,fd;
1983   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1984   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1985   int            cend,cstart,n;
1986 
1987   PetscFunctionBegin;
1988   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1989   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1990   if (!rank) {
1991     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1992     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1993     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1994     if (header[3] < 0) {
1995       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1996     }
1997   }
1998 
1999   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2000   M = header[1]; N = header[2];
2001   /* determine ownership of all rows */
2002   m = M/size + ((M % size) > rank);
2003   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2004   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2005   rowners[0] = 0;
2006   for (i=2; i<=size; i++) {
2007     rowners[i] += rowners[i-1];
2008   }
2009   rstart = rowners[rank];
2010   rend   = rowners[rank+1];
2011 
2012   /* distribute row lengths to all processors */
2013   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2014   offlens = ourlens + (rend-rstart);
2015   if (!rank) {
2016     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2017     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2018     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2019     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2020     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2021     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2022   } else {
2023     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2024   }
2025 
2026   if (!rank) {
2027     /* calculate the number of nonzeros on each processor */
2028     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2029     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2030     for (i=0; i<size; i++) {
2031       for (j=rowners[i]; j< rowners[i+1]; j++) {
2032         procsnz[i] += rowlengths[j];
2033       }
2034     }
2035     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2036 
2037     /* determine max buffer needed and allocate it */
2038     maxnz = 0;
2039     for (i=0; i<size; i++) {
2040       maxnz = PetscMax(maxnz,procsnz[i]);
2041     }
2042     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2043 
2044     /* read in my part of the matrix column indices  */
2045     nz   = procsnz[0];
2046     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2047     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2048 
2049     /* read in every one elses and ship off */
2050     for (i=1; i<size; i++) {
2051       nz   = procsnz[i];
2052       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2053       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2054     }
2055     ierr = PetscFree(cols);CHKERRQ(ierr);
2056   } else {
2057     /* determine buffer space needed for message */
2058     nz = 0;
2059     for (i=0; i<m; i++) {
2060       nz += ourlens[i];
2061     }
2062     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2063 
2064     /* receive message of column indices*/
2065     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2066     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2067     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2068   }
2069 
2070   /* determine column ownership if matrix is not square */
2071   if (N != M) {
2072     n      = N/size + ((N % size) > rank);
2073     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2074     cstart = cend - n;
2075   } else {
2076     cstart = rstart;
2077     cend   = rend;
2078     n      = cend - cstart;
2079   }
2080 
2081   /* loop over local rows, determining number of off diagonal entries */
2082   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2083   jj = 0;
2084   for (i=0; i<m; i++) {
2085     for (j=0; j<ourlens[i]; j++) {
2086       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2087       jj++;
2088     }
2089   }
2090 
2091   /* create our matrix */
2092   for (i=0; i<m; i++) {
2093     ourlens[i] -= offlens[i];
2094   }
2095   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2096   ierr = MatSetType(A,type);CHKERRQ(ierr);
2097   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2098 
2099   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2100   for (i=0; i<m; i++) {
2101     ourlens[i] += offlens[i];
2102   }
2103 
2104   if (!rank) {
2105     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2106 
2107     /* read in my part of the matrix numerical values  */
2108     nz   = procsnz[0];
2109     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2110 
2111     /* insert into matrix */
2112     jj      = rstart;
2113     smycols = mycols;
2114     svals   = vals;
2115     for (i=0; i<m; i++) {
2116       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2117       smycols += ourlens[i];
2118       svals   += ourlens[i];
2119       jj++;
2120     }
2121 
2122     /* read in other processors and ship out */
2123     for (i=1; i<size; i++) {
2124       nz   = procsnz[i];
2125       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2126       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2127     }
2128     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2129   } else {
2130     /* receive numeric values */
2131     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2132 
2133     /* receive message of values*/
2134     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2135     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2136     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2137 
2138     /* insert into matrix */
2139     jj      = rstart;
2140     smycols = mycols;
2141     svals   = vals;
2142     for (i=0; i<m; i++) {
2143       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2144       smycols += ourlens[i];
2145       svals   += ourlens[i];
2146       jj++;
2147     }
2148   }
2149   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2150   ierr = PetscFree(vals);CHKERRQ(ierr);
2151   ierr = PetscFree(mycols);CHKERRQ(ierr);
2152   ierr = PetscFree(rowners);CHKERRQ(ierr);
2153 
2154   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2155   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   *newmat = A;
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2162 /*
2163     Not great since it makes two copies of the submatrix, first an SeqAIJ
2164   in local and then by concatenating the local matrices the end result.
2165   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2166 */
2167 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2168 {
2169   PetscErrorCode ierr;
2170   PetscMPIInt    rank,size;
2171   int            i,m,n,rstart,row,rend,nz,*cwork,j;
2172   int            *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2173   Mat            *local,M,Mreuse;
2174   PetscScalar    *vwork,*aa;
2175   MPI_Comm       comm = mat->comm;
2176   Mat_SeqAIJ     *aij;
2177 
2178 
2179   PetscFunctionBegin;
2180   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2182 
2183   if (call ==  MAT_REUSE_MATRIX) {
2184     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2185     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2186     local = &Mreuse;
2187     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2188   } else {
2189     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2190     Mreuse = *local;
2191     ierr   = PetscFree(local);CHKERRQ(ierr);
2192   }
2193 
2194   /*
2195       m - number of local rows
2196       n - number of columns (same on all processors)
2197       rstart - first row in new global matrix generated
2198   */
2199   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2200   if (call == MAT_INITIAL_MATRIX) {
2201     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2202     ii  = aij->i;
2203     jj  = aij->j;
2204 
2205     /*
2206         Determine the number of non-zeros in the diagonal and off-diagonal
2207         portions of the matrix in order to do correct preallocation
2208     */
2209 
2210     /* first get start and end of "diagonal" columns */
2211     if (csize == PETSC_DECIDE) {
2212       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2213       if (mglobal == n) { /* square matrix */
2214 	nlocal = m;
2215       } else {
2216         nlocal = n/size + ((n % size) > rank);
2217       }
2218     } else {
2219       nlocal = csize;
2220     }
2221     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2222     rstart = rend - nlocal;
2223     if (rank == size - 1 && rend != n) {
2224       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2225     }
2226 
2227     /* next, compute all the lengths */
2228     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2229     olens = dlens + m;
2230     for (i=0; i<m; i++) {
2231       jend = ii[i+1] - ii[i];
2232       olen = 0;
2233       dlen = 0;
2234       for (j=0; j<jend; j++) {
2235         if (*jj < rstart || *jj >= rend) olen++;
2236         else dlen++;
2237         jj++;
2238       }
2239       olens[i] = olen;
2240       dlens[i] = dlen;
2241     }
2242     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2243     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2244     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2245     ierr = PetscFree(dlens);CHKERRQ(ierr);
2246   } else {
2247     int ml,nl;
2248 
2249     M = *newmat;
2250     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2251     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2252     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2253     /*
2254          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2255        rather than the slower MatSetValues().
2256     */
2257     M->was_assembled = PETSC_TRUE;
2258     M->assembled     = PETSC_FALSE;
2259   }
2260   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2261   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2262   ii  = aij->i;
2263   jj  = aij->j;
2264   aa  = aij->a;
2265   for (i=0; i<m; i++) {
2266     row   = rstart + i;
2267     nz    = ii[i+1] - ii[i];
2268     cwork = jj;     jj += nz;
2269     vwork = aa;     aa += nz;
2270     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2271   }
2272 
2273   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2275   *newmat = M;
2276 
2277   /* save submatrix used in processor for next request */
2278   if (call ==  MAT_INITIAL_MATRIX) {
2279     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2280     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2281   }
2282 
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2288 /*@C
2289    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2290    (the default parallel PETSc format).  For good matrix assembly performance
2291    the user should preallocate the matrix storage by setting the parameters
2292    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2293    performance can be increased by more than a factor of 50.
2294 
2295    Collective on MPI_Comm
2296 
2297    Input Parameters:
2298 +  A - the matrix
2299 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2300            (same value is used for all local rows)
2301 .  d_nnz - array containing the number of nonzeros in the various rows of the
2302            DIAGONAL portion of the local submatrix (possibly different for each row)
2303            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2304            The size of this array is equal to the number of local rows, i.e 'm'.
2305            You must leave room for the diagonal entry even if it is zero.
2306 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2307            submatrix (same value is used for all local rows).
2308 -  o_nnz - array containing the number of nonzeros in the various rows of the
2309            OFF-DIAGONAL portion of the local submatrix (possibly different for
2310            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2311            structure. The size of this array is equal to the number
2312            of local rows, i.e 'm'.
2313 
2314    The AIJ format (also called the Yale sparse matrix format or
2315    compressed row storage), is fully compatible with standard Fortran 77
2316    storage.  That is, the stored row and column indices can begin at
2317    either one (as in Fortran) or zero.  See the users manual for details.
2318 
2319    The user MUST specify either the local or global matrix dimensions
2320    (possibly both).
2321 
2322    The parallel matrix is partitioned such that the first m0 rows belong to
2323    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2324    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2325 
2326    The DIAGONAL portion of the local submatrix of a processor can be defined
2327    as the submatrix which is obtained by extraction the part corresponding
2328    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2329    first row that belongs to the processor, and r2 is the last row belonging
2330    to the this processor. This is a square mxm matrix. The remaining portion
2331    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2332 
2333    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2334 
2335    By default, this format uses inodes (identical nodes) when possible.
2336    We search for consecutive rows with the same nonzero structure, thereby
2337    reusing matrix information to achieve increased efficiency.
2338 
2339    Options Database Keys:
2340 +  -mat_aij_no_inode  - Do not use inodes
2341 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2342 -  -mat_aij_oneindex - Internally use indexing starting at 1
2343         rather than 0.  Note that when calling MatSetValues(),
2344         the user still MUST index entries starting at 0!
2345 
2346    Example usage:
2347 
2348    Consider the following 8x8 matrix with 34 non-zero values, that is
2349    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2350    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2351    as follows:
2352 
2353 .vb
2354             1  2  0  |  0  3  0  |  0  4
2355     Proc0   0  5  6  |  7  0  0  |  8  0
2356             9  0 10  | 11  0  0  | 12  0
2357     -------------------------------------
2358            13  0 14  | 15 16 17  |  0  0
2359     Proc1   0 18  0  | 19 20 21  |  0  0
2360             0  0  0  | 22 23  0  | 24  0
2361     -------------------------------------
2362     Proc2  25 26 27  |  0  0 28  | 29  0
2363            30  0  0  | 31 32 33  |  0 34
2364 .ve
2365 
2366    This can be represented as a collection of submatrices as:
2367 
2368 .vb
2369       A B C
2370       D E F
2371       G H I
2372 .ve
2373 
2374    Where the submatrices A,B,C are owned by proc0, D,E,F are
2375    owned by proc1, G,H,I are owned by proc2.
2376 
2377    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2378    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2379    The 'M','N' parameters are 8,8, and have the same values on all procs.
2380 
2381    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2382    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2383    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2384    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2385    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2386    matrix, ans [DF] as another SeqAIJ matrix.
2387 
2388    When d_nz, o_nz parameters are specified, d_nz storage elements are
2389    allocated for every row of the local diagonal submatrix, and o_nz
2390    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2391    One way to choose d_nz and o_nz is to use the max nonzerors per local
2392    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2393    In this case, the values of d_nz,o_nz are:
2394 .vb
2395      proc0 : dnz = 2, o_nz = 2
2396      proc1 : dnz = 3, o_nz = 2
2397      proc2 : dnz = 1, o_nz = 4
2398 .ve
2399    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2400    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2401    for proc3. i.e we are using 12+15+10=37 storage locations to store
2402    34 values.
2403 
2404    When d_nnz, o_nnz parameters are specified, the storage is specified
2405    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2406    In the above case the values for d_nnz,o_nnz are:
2407 .vb
2408      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2409      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2410      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2411 .ve
2412    Here the space allocated is sum of all the above values i.e 34, and
2413    hence pre-allocation is perfect.
2414 
2415    Level: intermediate
2416 
2417 .keywords: matrix, aij, compressed row, sparse, parallel
2418 
2419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2420 @*/
2421 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2422 {
2423   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2427   if (f) {
2428     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2429   }
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatCreateMPIAIJ"
2435 /*@C
2436    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2437    (the default parallel PETSc format).  For good matrix assembly performance
2438    the user should preallocate the matrix storage by setting the parameters
2439    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2440    performance can be increased by more than a factor of 50.
2441 
2442    Collective on MPI_Comm
2443 
2444    Input Parameters:
2445 +  comm - MPI communicator
2446 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2447            This value should be the same as the local size used in creating the
2448            y vector for the matrix-vector product y = Ax.
2449 .  n - This value should be the same as the local size used in creating the
2450        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2451        calculated if N is given) For square matrices n is almost always m.
2452 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2453 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2454 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2455            (same value is used for all local rows)
2456 .  d_nnz - array containing the number of nonzeros in the various rows of the
2457            DIAGONAL portion of the local submatrix (possibly different for each row)
2458            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2459            The size of this array is equal to the number of local rows, i.e 'm'.
2460            You must leave room for the diagonal entry even if it is zero.
2461 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2462            submatrix (same value is used for all local rows).
2463 -  o_nnz - array containing the number of nonzeros in the various rows of the
2464            OFF-DIAGONAL portion of the local submatrix (possibly different for
2465            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2466            structure. The size of this array is equal to the number
2467            of local rows, i.e 'm'.
2468 
2469    Output Parameter:
2470 .  A - the matrix
2471 
2472    Notes:
2473    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2474    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2475    storage requirements for this matrix.
2476 
2477    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2478    processor than it must be used on all processors that share the object for
2479    that argument.
2480 
2481    The AIJ format (also called the Yale sparse matrix format or
2482    compressed row storage), is fully compatible with standard Fortran 77
2483    storage.  That is, the stored row and column indices can begin at
2484    either one (as in Fortran) or zero.  See the users manual for details.
2485 
2486    The user MUST specify either the local or global matrix dimensions
2487    (possibly both).
2488 
2489    The parallel matrix is partitioned such that the first m0 rows belong to
2490    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2491    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2492 
2493    The DIAGONAL portion of the local submatrix of a processor can be defined
2494    as the submatrix which is obtained by extraction the part corresponding
2495    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2496    first row that belongs to the processor, and r2 is the last row belonging
2497    to the this processor. This is a square mxm matrix. The remaining portion
2498    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2499 
2500    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2501 
2502    When calling this routine with a single process communicator, a matrix of
2503    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2504    type of communicator, use the construction mechanism:
2505      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2506 
2507    By default, this format uses inodes (identical nodes) when possible.
2508    We search for consecutive rows with the same nonzero structure, thereby
2509    reusing matrix information to achieve increased efficiency.
2510 
2511    Options Database Keys:
2512 +  -mat_aij_no_inode  - Do not use inodes
2513 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2514 -  -mat_aij_oneindex - Internally use indexing starting at 1
2515         rather than 0.  Note that when calling MatSetValues(),
2516         the user still MUST index entries starting at 0!
2517 
2518 
2519    Example usage:
2520 
2521    Consider the following 8x8 matrix with 34 non-zero values, that is
2522    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2523    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2524    as follows:
2525 
2526 .vb
2527             1  2  0  |  0  3  0  |  0  4
2528     Proc0   0  5  6  |  7  0  0  |  8  0
2529             9  0 10  | 11  0  0  | 12  0
2530     -------------------------------------
2531            13  0 14  | 15 16 17  |  0  0
2532     Proc1   0 18  0  | 19 20 21  |  0  0
2533             0  0  0  | 22 23  0  | 24  0
2534     -------------------------------------
2535     Proc2  25 26 27  |  0  0 28  | 29  0
2536            30  0  0  | 31 32 33  |  0 34
2537 .ve
2538 
2539    This can be represented as a collection of submatrices as:
2540 
2541 .vb
2542       A B C
2543       D E F
2544       G H I
2545 .ve
2546 
2547    Where the submatrices A,B,C are owned by proc0, D,E,F are
2548    owned by proc1, G,H,I are owned by proc2.
2549 
2550    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2551    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2552    The 'M','N' parameters are 8,8, and have the same values on all procs.
2553 
2554    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2555    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2556    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2557    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2558    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2559    matrix, ans [DF] as another SeqAIJ matrix.
2560 
2561    When d_nz, o_nz parameters are specified, d_nz storage elements are
2562    allocated for every row of the local diagonal submatrix, and o_nz
2563    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2564    One way to choose d_nz and o_nz is to use the max nonzerors per local
2565    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2566    In this case, the values of d_nz,o_nz are:
2567 .vb
2568      proc0 : dnz = 2, o_nz = 2
2569      proc1 : dnz = 3, o_nz = 2
2570      proc2 : dnz = 1, o_nz = 4
2571 .ve
2572    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2573    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2574    for proc3. i.e we are using 12+15+10=37 storage locations to store
2575    34 values.
2576 
2577    When d_nnz, o_nnz parameters are specified, the storage is specified
2578    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2579    In the above case the values for d_nnz,o_nnz are:
2580 .vb
2581      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2582      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2583      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2584 .ve
2585    Here the space allocated is sum of all the above values i.e 34, and
2586    hence pre-allocation is perfect.
2587 
2588    Level: intermediate
2589 
2590 .keywords: matrix, aij, compressed row, sparse, parallel
2591 
2592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2593 @*/
2594 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2595 {
2596   PetscErrorCode ierr;
2597   int size;
2598 
2599   PetscFunctionBegin;
2600   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2602   if (size > 1) {
2603     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2604     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2605   } else {
2606     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2607     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2608   }
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 #undef __FUNCT__
2613 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2614 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2615 {
2616   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2617   PetscFunctionBegin;
2618   *Ad     = a->A;
2619   *Ao     = a->B;
2620   *colmap = a->garray;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 #undef __FUNCT__
2625 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2626 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2627 {
2628   PetscErrorCode ierr;
2629   int        i;
2630   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2631 
2632   PetscFunctionBegin;
2633   if (coloring->ctype == IS_COLORING_LOCAL) {
2634     ISColoringValue *allcolors,*colors;
2635     ISColoring      ocoloring;
2636 
2637     /* set coloring for diagonal portion */
2638     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2639 
2640     /* set coloring for off-diagonal portion */
2641     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2642     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2643     for (i=0; i<a->B->n; i++) {
2644       colors[i] = allcolors[a->garray[i]];
2645     }
2646     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2647     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2648     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2649     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2650   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2651     ISColoringValue *colors;
2652     int             *larray;
2653     ISColoring      ocoloring;
2654 
2655     /* set coloring for diagonal portion */
2656     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2657     for (i=0; i<a->A->n; i++) {
2658       larray[i] = i + a->cstart;
2659     }
2660     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2661     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2662     for (i=0; i<a->A->n; i++) {
2663       colors[i] = coloring->colors[larray[i]];
2664     }
2665     ierr = PetscFree(larray);CHKERRQ(ierr);
2666     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2667     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2668     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2669 
2670     /* set coloring for off-diagonal portion */
2671     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2672     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2673     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2674     for (i=0; i<a->B->n; i++) {
2675       colors[i] = coloring->colors[larray[i]];
2676     }
2677     ierr = PetscFree(larray);CHKERRQ(ierr);
2678     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2679     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2680     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2681   } else {
2682     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2683   }
2684 
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 #undef __FUNCT__
2689 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2690 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2697   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 #undef __FUNCT__
2702 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2703 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2704 {
2705   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2706   PetscErrorCode ierr;
2707 
2708   PetscFunctionBegin;
2709   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2710   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatMerge"
2716 /*@C
2717       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2718                  matrices from each processor
2719 
2720     Collective on MPI_Comm
2721 
2722    Input Parameters:
2723 +    comm - the communicators the parallel matrix will live on
2724 .    inmat - the input sequential matrices
2725 .    n - number of local columns (or PETSC_DECIDE)
2726 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2727 
2728    Output Parameter:
2729 .    outmat - the parallel matrix generated
2730 
2731     Level: advanced
2732 
2733    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2734 
2735 @*/
2736 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2737 {
2738   PetscErrorCode ierr;
2739   int               m,N,i,rstart,nnz,I,*dnz,*onz;
2740   const int         *indx;
2741   const PetscScalar *values;
2742   PetscMap          columnmap,rowmap;
2743 
2744   PetscFunctionBegin;
2745 
2746   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2747   if (scall == MAT_INITIAL_MATRIX){
2748     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2749     if (n == PETSC_DECIDE){
2750       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2751       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2752       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2753       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2754       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2755     }
2756 
2757     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2758     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2759     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2760     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2761     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2762 
2763     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2764     for (i=0;i<m;i++) {
2765       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2766       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2767       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2768     }
2769     /* This routine will ONLY return MPIAIJ type matrix */
2770     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2771     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2772     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2773     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2774 
2775   } else if (scall == MAT_REUSE_MATRIX){
2776     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2777   } else {
2778     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2779   }
2780 
2781   for (i=0;i<m;i++) {
2782     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2783     I    = i + rstart;
2784     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2785     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2786   }
2787   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2788   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2789   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790 
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatFileSplit"
2796 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2797 {
2798   PetscErrorCode    ierr;
2799   PetscMPIInt       rank;
2800   int               m,N,i,rstart,nnz;
2801   size_t            len;
2802   const int         *indx;
2803   PetscViewer       out;
2804   char              *name;
2805   Mat               B;
2806   const PetscScalar *values;
2807 
2808   PetscFunctionBegin;
2809 
2810   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2811   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2812   /* Should this be the type of the diagonal block of A? */
2813   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2814   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2815   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2816   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2817   for (i=0;i<m;i++) {
2818     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2819     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2820     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2821   }
2822   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2823   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2824 
2825   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2826   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2827   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2828   sprintf(name,"%s.%d",outfile,rank);
2829   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2830   ierr = PetscFree(name);
2831   ierr = MatView(B,out);CHKERRQ(ierr);
2832   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2833   ierr = MatDestroy(B);CHKERRQ(ierr);
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */
2838   PetscMap  rowmap;
2839   int       nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r;
2840   int       *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i]
2841                          store length of i-th send/recv matrix values */
2842   MatScalar *abuf_s;
2843 } Mat_Merge_SeqsToMPI;
2844 
2845 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2848 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2849 {
2850   PetscErrorCode ierr;
2851   Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr;
2852 
2853   PetscFunctionBegin;
2854   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2855   ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr);
2856   ierr = PetscFree(merge->len_sra);CHKERRQ(ierr);
2857   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2858   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2859   ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr);
2860   ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2861   ierr = PetscFree(merge);CHKERRQ(ierr);
2862 
2863   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #include "src/mat/utils/freespace.h"
2868 #include "petscbt.h"
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatMerge_SeqsToMPI"
2871 /*@C
2872       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2873                  matrices from each processor
2874 
2875     Collective on MPI_Comm
2876 
2877    Input Parameters:
2878 +    comm - the communicators the parallel matrix will live on
2879 .    seqmat - the input sequential matrices
2880 .    m - number of local rows (or PETSC_DECIDE)
2881 .    n - number of local columns (or PETSC_DECIDE)
2882 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2883 
2884    Output Parameter:
2885 .    mpimat - the parallel matrix generated
2886 
2887     Level: advanced
2888 
2889    Notes: The dimensions of the sequential matrix in each processor
2890       MUST be the same.
2891 
2892 @*/
2893 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
2894 {
2895   PetscErrorCode    ierr;
2896   Mat               B_seq,B_mpi;
2897   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2898   PetscMPIInt       size,rank;
2899   int               M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j,tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra;
2900   int               **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj;
2901   MPI_Request       *s_waits,*r_waits,*s_waitsa,*r_waitsa;
2902   MPI_Status        *status;
2903   MatScalar         *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i;
2904   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
2905   PetscBT           lnkbt;
2906   Mat_Merge_SeqsToMPI *merge;
2907 
2908   PetscFunctionBegin;
2909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2910   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2911   if (scall == MAT_INITIAL_MATRIX){
2912     ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2913   } else if (scall == MAT_REUSE_MATRIX){
2914     B_mpi = *mpimat;
2915     merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr;
2916     bi      = merge->bi;
2917     bj      = merge->bj;
2918     ijbuf_r = merge->ijbuf_r;
2919   } else {
2920     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2921   }
2922   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2923 
2924   /* determine the number of messages to send, their lengths */
2925   /*---------------------------------------------------------*/
2926   if (scall == MAT_INITIAL_MATRIX){
2927     ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
2928     if (m == PETSC_DECIDE) {
2929       ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
2930     } else {
2931       ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
2932     }
2933     ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
2934     ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr);
2935     ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr);
2936   }
2937   if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2938   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2939 
2940   len_sa  = merge->len_sra;
2941   len_ra = merge->len_sra + size;
2942 
2943   if (scall == MAT_INITIAL_MATRIX){
2944     merge->nsend = 0;
2945     len = len_a = 0;
2946     for (proc=0; proc<size; proc++){
2947       len_s[proc] = len_sa[proc] = 0;
2948       if (proc == rank) continue;
2949       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2950         len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */
2951       }
2952       if (len_sa[proc]) {
2953         merge->nsend++;
2954         len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc];
2955         if (len < len_s[proc]) len = len_s[proc];
2956         if (len_a < len_sa[proc]) len_a = len_sa[proc];
2957       }
2958     }
2959 
2960     /* determine the number and length of messages to receive */
2961     /*--------------------------------------------------------*/
2962     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
2963     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr);
2964     /*
2965     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] nsend: %d, nrecv: %d\n",rank,merge->nsend,merge->nrecv);
2966     for (i=0; i<merge->nrecv; i++){
2967       ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_r=%d from [%d]\n",rank,len_r[i],merge->id_r[i]);
2968     }
2969     */
2970 
2971     /* post the Irecvs corresponding to these messages */
2972     /*-------------------------------------------------*/
2973     /* get new tag to keep the communication clean */
2974     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr);
2975     ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr);
2976 
2977     /* post the sends */
2978     /*----------------*/
2979     ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2980     ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr);
2981     k = 0;
2982     for (proc=0; proc<size; proc++){
2983       if (!len_s[proc]) continue;
2984       /* form outgoing ij data to be sent to [proc] */
2985       nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc];
2986       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2987         anzi = ai[i+1] - ai[i];
2988         if (i==owners[proc]){
2989           ijbuf_s[i-owners[proc]] = anzi;
2990         } else {
2991           ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi;
2992         }
2993         for (j=0; j <anzi; j++){
2994           *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++;
2995       }
2996       }
2997       ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr);
2998       k++;
2999     }
3000 
3001     /* receives and sends of ij-structure are complete, deallocate memories */
3002     /*----------------------------------------------------------------------*/
3003     ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
3004     ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
3005 
3006     ierr = PetscFree(ijbuf_s);CHKERRQ(ierr);
3007     ierr = PetscFree(s_waits);CHKERRQ(ierr);
3008     ierr = PetscFree(r_waits);CHKERRQ(ierr);
3009   }
3010 
3011   /* send and recv matrix values */
3012   /*-----------------------------*/
3013   if (scall == MAT_INITIAL_MATRIX){
3014     ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr);
3015     for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */
3016   }
3017   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
3018   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr);
3019 
3020   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr);
3021   k = 0;
3022   for (proc=0; proc<size; proc++){
3023     if (!len_sa[proc]) continue;
3024     /* form outgoing mat value data to be sent to [proc] */
3025     abuf_i = merge->abuf_s;
3026     for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
3027       anzi = ai[i+1] - ai[i];
3028       for (j=0; j <anzi; j++){
3029         *abuf_i = *(aa+ai[i]+j); abuf_i++;
3030       }
3031     }
3032     ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr);
3033     k++;
3034   }
3035 
3036   ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr);
3037   ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr);
3038   ierr = PetscFree(status);CHKERRQ(ierr);
3039 
3040   ierr = PetscFree(s_waitsa);CHKERRQ(ierr);
3041   ierr = PetscFree(r_waitsa);CHKERRQ(ierr);
3042 
3043   /* create seq matrix B_seq in each processor */
3044   /*-------------------------------------------*/
3045   if (scall == MAT_INITIAL_MATRIX){
3046     ierr = PetscFree(len_s);CHKERRQ(ierr);
3047     /* allocate bi array and free space for accumulating nonzero column info */
3048     ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3049     bi[0] = 0;
3050 
3051     /* create and initialize a linked list */
3052     nlnk = N+1;
3053     ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3054 
3055     /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3056     len = 0;
3057     for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3058     ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3059     current_space = free_space;
3060 
3061     /* determine symbolic info for each row of B_seq */
3062     for (i=0;i<m;i++) {
3063       bnzi   = 0;
3064       /* add local non-zero cols of this proc's seqmat into lnk */
3065       arow   = owners[rank] + i;
3066       anzi   = ai[arow+1] - ai[arow];
3067       aj     = a->j + ai[arow];
3068       ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3069       bnzi += nlnk;
3070       /* add received col data into lnk */
3071       for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3072         /* i-th row */
3073         if (i == 0){
3074           anzi = *(ijbuf_r[k]+i);
3075           aj   = ijbuf_r[k] + m;
3076         } else {
3077           anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3078           aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3079         }
3080         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3081         bnzi += nlnk;
3082       }
3083 
3084       /* if free space is not available, make more free space */
3085       if (current_space->local_remaining<bnzi) {
3086         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3087         nspacedouble++;
3088       }
3089       /* copy data into free space, then initialize lnk */
3090       ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3091       current_space->array           += bnzi;
3092       current_space->local_used      += bnzi;
3093       current_space->local_remaining -= bnzi;
3094 
3095       bi[i+1] = bi[i] + bnzi;
3096     }
3097     ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3098     ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3099 
3100     ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3101     ierr = PetscFree(len_r);CHKERRQ(ierr);
3102 
3103     /* allocate space for ba */
3104     ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3105     ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3106 
3107     /* put together B_seq */
3108     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3109     ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3110 
3111     /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3112     /*--------------------------------------------------------------*/
3113     ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr);
3114   } /* endof if (scall == MAT_INITIAL_MATRIX) */
3115 
3116   /* insert mat values of B_mpi */
3117   /*----------------------------*/
3118   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3119 
3120   /* set values of ba */
3121   for (i=0; i<m; i++) {
3122     arow = owners[rank] + i;
3123     bj_i = bj+bi[i];  /* col indices of the i-th row of B_seq */
3124     bnzi = bi[i+1] - bi[i];
3125     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3126 
3127     /* add local non-zero vals of this proc's seqmat into ba */
3128     anzi = ai[arow+1] - ai[arow];
3129     aj   = a->j + ai[arow];
3130     aa   = a->a + ai[arow];
3131     nextaj = 0;
3132     for (j=0; nextaj<anzi; j++){
3133       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3134         ba_i[j] += aa[nextaj++];
3135       }
3136     }
3137 
3138     /* add received vals into ba */
3139     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3140       /* i-th row */
3141       if (i == 0){
3142         anzi = *(ijbuf_r[k]+i);
3143         aj   = ijbuf_r[k] + m;
3144         aa   = abuf_r[k];
3145       } else {
3146         anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3147         aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3148         aa   = abuf_r[k] + *(ijbuf_r[k]+i-1);
3149       }
3150       nextaj = 0;
3151       for (j=0; nextaj<anzi; j++){
3152         if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3153           ba_i[j] += aa[nextaj++];
3154         }
3155       }
3156     }
3157 
3158     ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3159   }
3160   ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162 
3163   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3164   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3165 
3166   /* attach the supporting struct to B_mpi for reuse */
3167   /*-------------------------------------------------*/
3168   if (scall == MAT_INITIAL_MATRIX){
3169     B_mpi->spptr         = (void*)merge;
3170     B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3171     merge->bi            = bi;
3172     merge->bj            = bj;
3173     merge->ijbuf_r       = ijbuf_r;
3174 
3175     *mpimat = B_mpi;
3176   }
3177   PetscFunctionReturn(0);
3178 }
3179