xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2f0fba453cb3bb8484830f26eee9e052e271c2c8)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = ((PetscObject)A)->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
943 
944     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
945     if (!rank) {
946       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
947     } else {
948       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
949     }
950     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
951     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
952     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
953     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
954 
955     /* copy over the A part */
956     Aloc = (Mat_SeqAIJ*)aij->A->data;
957     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
958     row = mat->rmap.rstart;
959     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
960     for (i=0; i<m; i++) {
961       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
962       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
963     }
964     aj = Aloc->j;
965     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
966 
967     /* copy over the B part */
968     Aloc = (Mat_SeqAIJ*)aij->B->data;
969     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
970     row  = mat->rmap.rstart;
971     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
972     ct   = cols;
973     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
974     for (i=0; i<m; i++) {
975       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
976       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
977     }
978     ierr = PetscFree(ct);CHKERRQ(ierr);
979     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     /*
982        Everyone has to call to draw the matrix since the graphics waits are
983        synchronized across all processors that share the PetscDraw object
984     */
985     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
986     if (!rank) {
987       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
988       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
989     }
990     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
991     ierr = MatDestroy(A);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatView_MPIAIJ"
998 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
999 {
1000   PetscErrorCode ierr;
1001   PetscTruth     iascii,isdraw,issocket,isbinary;
1002 
1003   PetscFunctionBegin;
1004   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1005   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1006   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1007   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1008   if (iascii || isdraw || isbinary || issocket) {
1009     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1010   } else {
1011     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatRelax_MPIAIJ"
1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1019 {
1020   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1021   PetscErrorCode ierr;
1022   Vec            bb1;
1023 
1024   PetscFunctionBegin;
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1043     }
1044   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1045     if (flag & SOR_ZERO_INITIAL_GUESS) {
1046       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1047       its--;
1048     }
1049     while (its--) {
1050       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052 
1053       /* update rhs: bb1 = bb - B*x */
1054       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1055       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1056 
1057       /* local sweep */
1058       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1059     }
1060   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1061     if (flag & SOR_ZERO_INITIAL_GUESS) {
1062       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1063       its--;
1064     }
1065     while (its--) {
1066       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1067       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1068 
1069       /* update rhs: bb1 = bb - B*x */
1070       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1071       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1072 
1073       /* local sweep */
1074       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1075     }
1076   } else {
1077     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1078   }
1079 
1080   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatPermute_MPIAIJ"
1086 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1087 {
1088   MPI_Comm       comm,pcomm;
1089   PetscInt       first,local_size,nrows,*rows;
1090   int            ntids;
1091   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1096   /* make a collective version of 'rowp' */
1097   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1098   if (pcomm==comm) {
1099     crowp = rowp;
1100   } else {
1101     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1102     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1103     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1104     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1105   }
1106   /* collect the global row permutation and invert it */
1107   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1108   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1109   if (pcomm!=comm) {
1110     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1111   }
1112   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1113   /* get the local target indices */
1114   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1115   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1116   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1117   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1118   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1119   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1120   /* the column permutation is so much easier;
1121      make a local version of 'colp' and invert it */
1122   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1123   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1124   if (ntids==1) {
1125     lcolp = colp;
1126   } else {
1127     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1128     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1129     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1130   }
1131   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1132   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1133   if (ntids>1) {
1134     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1135     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1136   }
1137   /* now we just get the submatrix */
1138   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1139   /* clean up */
1140   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1141   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1147 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1148 {
1149   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1150   Mat            A = mat->A,B = mat->B;
1151   PetscErrorCode ierr;
1152   PetscReal      isend[5],irecv[5];
1153 
1154   PetscFunctionBegin;
1155   info->block_size     = 1.0;
1156   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1158   isend[3] = info->memory;  isend[4] = info->mallocs;
1159   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1161   isend[3] += info->memory;  isend[4] += info->mallocs;
1162   if (flag == MAT_LOCAL) {
1163     info->nz_used      = isend[0];
1164     info->nz_allocated = isend[1];
1165     info->nz_unneeded  = isend[2];
1166     info->memory       = isend[3];
1167     info->mallocs      = isend[4];
1168   } else if (flag == MAT_GLOBAL_MAX) {
1169     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1170     info->nz_used      = irecv[0];
1171     info->nz_allocated = irecv[1];
1172     info->nz_unneeded  = irecv[2];
1173     info->memory       = irecv[3];
1174     info->mallocs      = irecv[4];
1175   } else if (flag == MAT_GLOBAL_SUM) {
1176     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   }
1183   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1184   info->fill_ratio_needed = 0;
1185   info->factor_mallocs    = 0;
1186   info->rows_global       = (double)matin->rmap.N;
1187   info->columns_global    = (double)matin->cmap.N;
1188   info->rows_local        = (double)matin->rmap.n;
1189   info->columns_local     = (double)matin->cmap.N;
1190 
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatSetOption_MPIAIJ"
1196 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   switch (op) {
1203   case MAT_NEW_NONZERO_LOCATIONS:
1204   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1205   case MAT_KEEP_ZEROED_ROWS:
1206   case MAT_NEW_NONZERO_LOCATION_ERR:
1207   case MAT_USE_INODES:
1208   case MAT_IGNORE_ZERO_ENTRIES:
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_ROW_ORIENTED:
1213     a->roworiented = flg;
1214     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1216     break;
1217   case MAT_NEW_DIAGONALS:
1218     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1219     break;
1220   case MAT_IGNORE_OFF_PROC_ENTRIES:
1221     a->donotstash = PETSC_TRUE;
1222     break;
1223   case MAT_SYMMETRIC:
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225     break;
1226   case MAT_STRUCTURALLY_SYMMETRIC:
1227   case MAT_HERMITIAN:
1228   case MAT_SYMMETRY_ETERNAL:
1229     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1230     break;
1231   default:
1232     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatGetRow_MPIAIJ"
1239 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1240 {
1241   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1242   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1243   PetscErrorCode ierr;
1244   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1245   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1246   PetscInt       *cmap,*idx_p;
1247 
1248   PetscFunctionBegin;
1249   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1250   mat->getrowactive = PETSC_TRUE;
1251 
1252   if (!mat->rowvalues && (idx || v)) {
1253     /*
1254         allocate enough space to hold information from the longest row.
1255     */
1256     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1257     PetscInt     max = 1,tmp;
1258     for (i=0; i<matin->rmap.n; i++) {
1259       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1260       if (max < tmp) { max = tmp; }
1261     }
1262     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1263     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1264   }
1265 
1266   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1267   lrow = row - rstart;
1268 
1269   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1270   if (!v)   {pvA = 0; pvB = 0;}
1271   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1272   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1273   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1274   nztot = nzA + nzB;
1275 
1276   cmap  = mat->garray;
1277   if (v  || idx) {
1278     if (nztot) {
1279       /* Sort by increasing column numbers, assuming A and B already sorted */
1280       PetscInt imark = -1;
1281       if (v) {
1282         *v = v_p = mat->rowvalues;
1283         for (i=0; i<nzB; i++) {
1284           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1285           else break;
1286         }
1287         imark = i;
1288         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1289         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1290       }
1291       if (idx) {
1292         *idx = idx_p = mat->rowindices;
1293         if (imark > -1) {
1294           for (i=0; i<imark; i++) {
1295             idx_p[i] = cmap[cworkB[i]];
1296           }
1297         } else {
1298           for (i=0; i<nzB; i++) {
1299             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1300             else break;
1301           }
1302           imark = i;
1303         }
1304         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1305         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1306       }
1307     } else {
1308       if (idx) *idx = 0;
1309       if (v)   *v   = 0;
1310     }
1311   }
1312   *nz = nztot;
1313   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1314   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1320 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1321 {
1322   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1323 
1324   PetscFunctionBegin;
1325   if (!aij->getrowactive) {
1326     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1327   }
1328   aij->getrowactive = PETSC_FALSE;
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatNorm_MPIAIJ"
1334 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1335 {
1336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1337   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1338   PetscErrorCode ierr;
1339   PetscInt       i,j,cstart = mat->cmap.rstart;
1340   PetscReal      sum = 0.0;
1341   PetscScalar    *v;
1342 
1343   PetscFunctionBegin;
1344   if (aij->size == 1) {
1345     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1346   } else {
1347     if (type == NORM_FROBENIUS) {
1348       v = amat->a;
1349       for (i=0; i<amat->nz; i++) {
1350 #if defined(PETSC_USE_COMPLEX)
1351         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1352 #else
1353         sum += (*v)*(*v); v++;
1354 #endif
1355       }
1356       v = bmat->a;
1357       for (i=0; i<bmat->nz; i++) {
1358 #if defined(PETSC_USE_COMPLEX)
1359         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1360 #else
1361         sum += (*v)*(*v); v++;
1362 #endif
1363       }
1364       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1365       *norm = sqrt(*norm);
1366     } else if (type == NORM_1) { /* max column norm */
1367       PetscReal *tmp,*tmp2;
1368       PetscInt    *jj,*garray = aij->garray;
1369       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1370       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1371       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1372       *norm = 0.0;
1373       v = amat->a; jj = amat->j;
1374       for (j=0; j<amat->nz; j++) {
1375         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1376       }
1377       v = bmat->a; jj = bmat->j;
1378       for (j=0; j<bmat->nz; j++) {
1379         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1380       }
1381       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1382       for (j=0; j<mat->cmap.N; j++) {
1383         if (tmp2[j] > *norm) *norm = tmp2[j];
1384       }
1385       ierr = PetscFree(tmp);CHKERRQ(ierr);
1386       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1387     } else if (type == NORM_INFINITY) { /* max row norm */
1388       PetscReal ntemp = 0.0;
1389       for (j=0; j<aij->A->rmap.n; j++) {
1390         v = amat->a + amat->i[j];
1391         sum = 0.0;
1392         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1393           sum += PetscAbsScalar(*v); v++;
1394         }
1395         v = bmat->a + bmat->i[j];
1396         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1397           sum += PetscAbsScalar(*v); v++;
1398         }
1399         if (sum > ntemp) ntemp = sum;
1400       }
1401       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1402     } else {
1403       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1404     }
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatTranspose_MPIAIJ"
1411 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1412 {
1413   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1414   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1415   PetscErrorCode ierr;
1416   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1417   PetscInt       cstart=A->cmap.rstart,ncol;
1418   Mat            B;
1419   PetscScalar    *array;
1420 
1421   PetscFunctionBegin;
1422   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1423 
1424   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1425   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1426   ai = Aloc->i; aj = Aloc->j;
1427   bi = Bloc->i; bj = Bloc->j;
1428   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1429   cols = d_nnz + na + 1; /* work space to be used by B part */
1430   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1431   for (i=0; i<ai[ma]; i++){
1432     d_nnz[aj[i]] ++;
1433     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1434   }
1435 
1436   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1437   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1438   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1439   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1440 
1441   /* copy over the A part */
1442   array = Aloc->a;
1443   row = A->rmap.rstart;
1444   for (i=0; i<ma; i++) {
1445     ncol = ai[i+1]-ai[i];
1446     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1447     row++; array += ncol; aj += ncol;
1448   }
1449   aj = Aloc->j;
1450   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1451 
1452   /* copy over the B part */
1453   array = Bloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1456   for (i=0; i<mb; i++) {
1457     ncol = bi[i+1]-bi[i];
1458     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1459     row++; array += ncol; cols += ncol;
1460   }
1461   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1462   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1463   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464   if (matout) {
1465     *matout = B;
1466   } else {
1467     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1474 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1475 {
1476   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1477   Mat            a = aij->A,b = aij->B;
1478   PetscErrorCode ierr;
1479   PetscInt       s1,s2,s3;
1480 
1481   PetscFunctionBegin;
1482   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1483   if (rr) {
1484     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1485     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1486     /* Overlap communication with computation. */
1487     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1488   }
1489   if (ll) {
1490     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1491     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1492     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1493   }
1494   /* scale  the diagonal block */
1495   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1496 
1497   if (rr) {
1498     /* Do a scatter end and then right scale the off-diagonal block */
1499     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1501   }
1502 
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1508 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1509 {
1510   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1511   PetscErrorCode ierr;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1515   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1520 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1521 {
1522   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatEqual_MPIAIJ"
1532 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1533 {
1534   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1535   Mat            a,b,c,d;
1536   PetscTruth     flg;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   a = matA->A; b = matA->B;
1541   c = matB->A; d = matB->B;
1542 
1543   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1544   if (flg) {
1545     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1546   }
1547   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatCopy_MPIAIJ"
1553 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1554 {
1555   PetscErrorCode ierr;
1556   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1557   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1558 
1559   PetscFunctionBegin;
1560   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1561   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1562     /* because of the column compression in the off-processor part of the matrix a->B,
1563        the number of columns in a->B and b->B may be different, hence we cannot call
1564        the MatCopy() directly on the two parts. If need be, we can provide a more
1565        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1566        then copying the submatrices */
1567     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1568   } else {
1569     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1570     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1571   }
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1577 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1578 {
1579   PetscErrorCode ierr;
1580 
1581   PetscFunctionBegin;
1582   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #include "petscblaslapack.h"
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatAXPY_MPIAIJ"
1589 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1590 {
1591   PetscErrorCode ierr;
1592   PetscInt       i;
1593   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1594   PetscBLASInt   bnz,one=1;
1595   Mat_SeqAIJ     *x,*y;
1596 
1597   PetscFunctionBegin;
1598   if (str == SAME_NONZERO_PATTERN) {
1599     PetscScalar alpha = a;
1600     x = (Mat_SeqAIJ *)xx->A->data;
1601     y = (Mat_SeqAIJ *)yy->A->data;
1602     bnz = PetscBLASIntCast(x->nz);
1603     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1604     x = (Mat_SeqAIJ *)xx->B->data;
1605     y = (Mat_SeqAIJ *)yy->B->data;
1606     bnz = PetscBLASIntCast(x->nz);
1607     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1608   } else if (str == SUBSET_NONZERO_PATTERN) {
1609     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1610 
1611     x = (Mat_SeqAIJ *)xx->B->data;
1612     y = (Mat_SeqAIJ *)yy->B->data;
1613     if (y->xtoy && y->XtoY != xx->B) {
1614       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1615       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1616     }
1617     if (!y->xtoy) { /* get xtoy */
1618       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1619       y->XtoY = xx->B;
1620       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1621     }
1622     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1623   } else {
1624     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatConjugate_MPIAIJ"
1633 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1634 {
1635 #if defined(PETSC_USE_COMPLEX)
1636   PetscErrorCode ierr;
1637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1638 
1639   PetscFunctionBegin;
1640   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1641   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1642 #else
1643   PetscFunctionBegin;
1644 #endif
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatRealPart_MPIAIJ"
1650 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1651 {
1652   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1653   PetscErrorCode ierr;
1654 
1655   PetscFunctionBegin;
1656   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1657   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1663 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1664 {
1665   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1670   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #ifdef PETSC_HAVE_PBGL
1675 
1676 #include <boost/parallel/mpi/bsp_process_group.hpp>
1677 #include <boost/graph/distributed/ilu_default_graph.hpp>
1678 #include <boost/graph/distributed/ilu_0_block.hpp>
1679 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1680 #include <boost/graph/distributed/petsc/interface.hpp>
1681 #include <boost/multi_array.hpp>
1682 #include <boost/parallel/distributed_property_map.hpp>
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1686 /*
1687   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1688 */
1689 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1690 {
1691   namespace petsc = boost::distributed::petsc;
1692 
1693   namespace graph_dist = boost::graph::distributed;
1694   using boost::graph::distributed::ilu_default::process_group_type;
1695   using boost::graph::ilu_permuted;
1696 
1697   PetscTruth      row_identity, col_identity;
1698   PetscContainer  c;
1699   PetscInt        m, n, M, N;
1700   PetscErrorCode  ierr;
1701 
1702   PetscFunctionBegin;
1703   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1704   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1705   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1706   if (!row_identity || !col_identity) {
1707     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1708   }
1709 
1710   process_group_type pg;
1711   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1712   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1713   lgraph_type&   level_graph = *lgraph_p;
1714   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1715 
1716   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1717   ilu_permuted(level_graph);
1718 
1719   /* put together the new matrix */
1720   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1721   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1722   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1723   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1724   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1725   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1726   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   (*fact)->factor = FACTOR_LU;
1728 
1729   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1730   ierr = PetscContainerSetPointer(c, lgraph_p);
1731   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1737 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1738 {
1739   PetscFunctionBegin;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSolve_MPIAIJ"
1745 /*
1746   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1747 */
1748 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1749 {
1750   namespace graph_dist = boost::graph::distributed;
1751 
1752   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1753   lgraph_type*   lgraph_p;
1754   PetscContainer c;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1759   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1760   ierr = VecCopy(b, x); CHKERRQ(ierr);
1761 
1762   PetscScalar* array_x;
1763   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1764   PetscInt sx;
1765   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1766 
1767   PetscScalar* array_b;
1768   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1769   PetscInt sb;
1770   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1771 
1772   lgraph_type&   level_graph = *lgraph_p;
1773   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1774 
1775   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1776   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1777                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1778 
1779   typedef boost::iterator_property_map<array_ref_type::iterator,
1780                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1781   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1782                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1783 
1784   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1785 
1786   PetscFunctionReturn(0);
1787 }
1788 #endif
1789 
1790 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1791   PetscInt       nzlocal,nsends,nrecvs;
1792   PetscMPIInt    *send_rank;
1793   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1794   PetscScalar    *sbuf_a,**rbuf_a;
1795   PetscErrorCode (*MatDestroy)(Mat);
1796 } Mat_Redundant;
1797 
1798 #undef __FUNCT__
1799 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1800 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1801 {
1802   PetscErrorCode       ierr;
1803   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1804   PetscInt             i;
1805 
1806   PetscFunctionBegin;
1807   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1808   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1809   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1810   for (i=0; i<redund->nrecvs; i++){
1811     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1812     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1813   }
1814   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1815   ierr = PetscFree(redund);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatDestroy_MatRedundant"
1821 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1822 {
1823   PetscErrorCode  ierr;
1824   PetscContainer  container;
1825   Mat_Redundant   *redund=PETSC_NULL;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1829   if (container) {
1830     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1831   } else {
1832     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1833   }
1834   A->ops->destroy = redund->MatDestroy;
1835   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1836   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1837   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1843 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1844 {
1845   PetscMPIInt    rank,size;
1846   MPI_Comm       comm=((PetscObject)mat)->comm;
1847   PetscErrorCode ierr;
1848   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1849   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1850   PetscInt       *rowrange=mat->rmap.range;
1851   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1852   Mat            A=aij->A,B=aij->B,C=*matredundant;
1853   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1854   PetscScalar    *sbuf_a;
1855   PetscInt       nzlocal=a->nz+b->nz;
1856   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1857   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1858   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1859   PetscScalar    *vals,*aworkA,*aworkB;
1860   PetscMPIInt    tag1,tag2,tag3,imdex;
1861   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1862                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1863   MPI_Status     recv_status,*send_status;
1864   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1865   PetscInt       **rbuf_j=PETSC_NULL;
1866   PetscScalar    **rbuf_a=PETSC_NULL;
1867   Mat_Redundant  *redund=PETSC_NULL;
1868   PetscContainer container;
1869 
1870   PetscFunctionBegin;
1871   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1873 
1874   if (reuse == MAT_REUSE_MATRIX) {
1875     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1876     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1877     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1878     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1879     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1880     if (container) {
1881       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1882     } else {
1883       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1884     }
1885     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1886 
1887     nsends    = redund->nsends;
1888     nrecvs    = redund->nrecvs;
1889     send_rank = redund->send_rank; recv_rank = send_rank + size;
1890     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1891     sbuf_j    = redund->sbuf_j;
1892     sbuf_a    = redund->sbuf_a;
1893     rbuf_j    = redund->rbuf_j;
1894     rbuf_a    = redund->rbuf_a;
1895   }
1896 
1897   if (reuse == MAT_INITIAL_MATRIX){
1898     PetscMPIInt  subrank,subsize;
1899     PetscInt     nleftover,np_subcomm;
1900     /* get the destination processors' id send_rank, nsends and nrecvs */
1901     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1902     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1903     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1904     recv_rank = send_rank + size;
1905     np_subcomm = size/nsubcomm;
1906     nleftover  = size - nsubcomm*np_subcomm;
1907     nsends = 0; nrecvs = 0;
1908     for (i=0; i<size; i++){ /* i=rank*/
1909       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1910         send_rank[nsends] = i; nsends++;
1911         recv_rank[nrecvs++] = i;
1912       }
1913     }
1914     if (rank >= size - nleftover){/* this proc is a leftover processor */
1915       i = size-nleftover-1;
1916       j = 0;
1917       while (j < nsubcomm - nleftover){
1918         send_rank[nsends++] = i;
1919         i--; j++;
1920       }
1921     }
1922 
1923     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1924       for (i=0; i<nleftover; i++){
1925         recv_rank[nrecvs++] = size-nleftover+i;
1926       }
1927     }
1928 
1929     /* allocate sbuf_j, sbuf_a */
1930     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1931     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1932     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1933   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1934 
1935   /* copy mat's local entries into the buffers */
1936   if (reuse == MAT_INITIAL_MATRIX){
1937     rownz_max = 0;
1938     rptr = sbuf_j;
1939     cols = sbuf_j + rend-rstart + 1;
1940     vals = sbuf_a;
1941     rptr[0] = 0;
1942     for (i=0; i<rend-rstart; i++){
1943       row = i + rstart;
1944       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1945       ncols  = nzA + nzB;
1946       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1947       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1948       /* load the column indices for this row into cols */
1949       lwrite = 0;
1950       for (l=0; l<nzB; l++) {
1951         if ((ctmp = bmap[cworkB[l]]) < cstart){
1952           vals[lwrite]   = aworkB[l];
1953           cols[lwrite++] = ctmp;
1954         }
1955       }
1956       for (l=0; l<nzA; l++){
1957         vals[lwrite]   = aworkA[l];
1958         cols[lwrite++] = cstart + cworkA[l];
1959       }
1960       for (l=0; l<nzB; l++) {
1961         if ((ctmp = bmap[cworkB[l]]) >= cend){
1962           vals[lwrite]   = aworkB[l];
1963           cols[lwrite++] = ctmp;
1964         }
1965       }
1966       vals += ncols;
1967       cols += ncols;
1968       rptr[i+1] = rptr[i] + ncols;
1969       if (rownz_max < ncols) rownz_max = ncols;
1970     }
1971     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1972   } else { /* only copy matrix values into sbuf_a */
1973     rptr = sbuf_j;
1974     vals = sbuf_a;
1975     rptr[0] = 0;
1976     for (i=0; i<rend-rstart; i++){
1977       row = i + rstart;
1978       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1979       ncols  = nzA + nzB;
1980       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1981       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1982       lwrite = 0;
1983       for (l=0; l<nzB; l++) {
1984         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1985       }
1986       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1987       for (l=0; l<nzB; l++) {
1988         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1989       }
1990       vals += ncols;
1991       rptr[i+1] = rptr[i] + ncols;
1992     }
1993   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1994 
1995   /* send nzlocal to others, and recv other's nzlocal */
1996   /*--------------------------------------------------*/
1997   if (reuse == MAT_INITIAL_MATRIX){
1998     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
1999     s_waits2 = s_waits3 + nsends;
2000     s_waits1 = s_waits2 + nsends;
2001     r_waits1 = s_waits1 + nsends;
2002     r_waits2 = r_waits1 + nrecvs;
2003     r_waits3 = r_waits2 + nrecvs;
2004   } else {
2005     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2006     r_waits3 = s_waits3 + nsends;
2007   }
2008 
2009   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2010   if (reuse == MAT_INITIAL_MATRIX){
2011     /* get new tags to keep the communication clean */
2012     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2013     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2014     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2015     rbuf_nz = sbuf_nz + nsends;
2016 
2017     /* post receives of other's nzlocal */
2018     for (i=0; i<nrecvs; i++){
2019       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2020     }
2021     /* send nzlocal to others */
2022     for (i=0; i<nsends; i++){
2023       sbuf_nz[i] = nzlocal;
2024       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2025     }
2026     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2027     count = nrecvs;
2028     while (count) {
2029       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2030       recv_rank[imdex] = recv_status.MPI_SOURCE;
2031       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2032       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2033 
2034       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2035       rbuf_nz[imdex] += i + 2;
2036       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2037       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2038       count--;
2039     }
2040     /* wait on sends of nzlocal */
2041     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2042     /* send mat->i,j to others, and recv from other's */
2043     /*------------------------------------------------*/
2044     for (i=0; i<nsends; i++){
2045       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2046       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2047     }
2048     /* wait on receives of mat->i,j */
2049     /*------------------------------*/
2050     count = nrecvs;
2051     while (count) {
2052       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2053       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2054       count--;
2055     }
2056     /* wait on sends of mat->i,j */
2057     /*---------------------------*/
2058     if (nsends) {
2059       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2060     }
2061   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2062 
2063   /* post receives, send and receive mat->a */
2064   /*----------------------------------------*/
2065   for (imdex=0; imdex<nrecvs; imdex++) {
2066     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2067   }
2068   for (i=0; i<nsends; i++){
2069     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2070   }
2071   count = nrecvs;
2072   while (count) {
2073     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2074     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2075     count--;
2076   }
2077   if (nsends) {
2078     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2079   }
2080 
2081   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2082 
2083   /* create redundant matrix */
2084   /*-------------------------*/
2085   if (reuse == MAT_INITIAL_MATRIX){
2086     /* compute rownz_max for preallocation */
2087     for (imdex=0; imdex<nrecvs; imdex++){
2088       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2089       rptr = rbuf_j[imdex];
2090       for (i=0; i<j; i++){
2091         ncols = rptr[i+1] - rptr[i];
2092         if (rownz_max < ncols) rownz_max = ncols;
2093       }
2094     }
2095 
2096     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2097     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2098     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2099     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2100     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2101   } else {
2102     C = *matredundant;
2103   }
2104 
2105   /* insert local matrix entries */
2106   rptr = sbuf_j;
2107   cols = sbuf_j + rend-rstart + 1;
2108   vals = sbuf_a;
2109   for (i=0; i<rend-rstart; i++){
2110     row   = i + rstart;
2111     ncols = rptr[i+1] - rptr[i];
2112     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2113     vals += ncols;
2114     cols += ncols;
2115   }
2116   /* insert received matrix entries */
2117   for (imdex=0; imdex<nrecvs; imdex++){
2118     rstart = rowrange[recv_rank[imdex]];
2119     rend   = rowrange[recv_rank[imdex]+1];
2120     rptr = rbuf_j[imdex];
2121     cols = rbuf_j[imdex] + rend-rstart + 1;
2122     vals = rbuf_a[imdex];
2123     for (i=0; i<rend-rstart; i++){
2124       row   = i + rstart;
2125       ncols = rptr[i+1] - rptr[i];
2126       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2127       vals += ncols;
2128       cols += ncols;
2129     }
2130   }
2131   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2132   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2133   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2134   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2135   if (reuse == MAT_INITIAL_MATRIX){
2136     PetscContainer container;
2137     *matredundant = C;
2138     /* create a supporting struct and attach it to C for reuse */
2139     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2140     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2141     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2142     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2143     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2144 
2145     redund->nzlocal = nzlocal;
2146     redund->nsends  = nsends;
2147     redund->nrecvs  = nrecvs;
2148     redund->send_rank = send_rank;
2149     redund->sbuf_nz = sbuf_nz;
2150     redund->sbuf_j  = sbuf_j;
2151     redund->sbuf_a  = sbuf_a;
2152     redund->rbuf_j  = rbuf_j;
2153     redund->rbuf_a  = rbuf_a;
2154 
2155     redund->MatDestroy = C->ops->destroy;
2156     C->ops->destroy    = MatDestroy_MatRedundant;
2157   }
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2163 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2164 {
2165   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2166   PetscInt       n      = A->rmap.n;
2167   PetscInt       cstart = A->cmap.rstart;
2168   PetscInt      *cmap   = mat->garray;
2169   PetscInt      *diagIdx, *offdiagIdx;
2170   Vec            diagV, offdiagV;
2171   PetscScalar   *a, *diagA, *offdiagA;
2172   PetscInt       r;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2177   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2178   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2179   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2180   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2181   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2182   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2183   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2184   for(r = 0; r < n; ++r) {
2185     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2186       a[r]   = diagA[r];
2187       idx[r] = cstart + diagIdx[r];
2188     } else {
2189       a[r]   = offdiagA[r];
2190       idx[r] = cmap[offdiagIdx[r]];
2191     }
2192   }
2193   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2194   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2195   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2196   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2197   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2198   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 #undef __FUNCT__
2203 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2204 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2205 {
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 /* -------------------------------------------------------------------*/
2214 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2215        MatGetRow_MPIAIJ,
2216        MatRestoreRow_MPIAIJ,
2217        MatMult_MPIAIJ,
2218 /* 4*/ MatMultAdd_MPIAIJ,
2219        MatMultTranspose_MPIAIJ,
2220        MatMultTransposeAdd_MPIAIJ,
2221 #ifdef PETSC_HAVE_PBGL
2222        MatSolve_MPIAIJ,
2223 #else
2224        0,
2225 #endif
2226        0,
2227        0,
2228 /*10*/ 0,
2229        0,
2230        0,
2231        MatRelax_MPIAIJ,
2232        MatTranspose_MPIAIJ,
2233 /*15*/ MatGetInfo_MPIAIJ,
2234        MatEqual_MPIAIJ,
2235        MatGetDiagonal_MPIAIJ,
2236        MatDiagonalScale_MPIAIJ,
2237        MatNorm_MPIAIJ,
2238 /*20*/ MatAssemblyBegin_MPIAIJ,
2239        MatAssemblyEnd_MPIAIJ,
2240        0,
2241        MatSetOption_MPIAIJ,
2242        MatZeroEntries_MPIAIJ,
2243 /*25*/ MatZeroRows_MPIAIJ,
2244        0,
2245 #ifdef PETSC_HAVE_PBGL
2246        MatLUFactorNumeric_MPIAIJ,
2247 #else
2248        0,
2249 #endif
2250        0,
2251        0,
2252 /*30*/ MatSetUpPreallocation_MPIAIJ,
2253 #ifdef PETSC_HAVE_PBGL
2254        MatILUFactorSymbolic_MPIAIJ,
2255 #else
2256        0,
2257 #endif
2258        0,
2259        0,
2260        0,
2261 /*35*/ MatDuplicate_MPIAIJ,
2262        0,
2263        0,
2264        0,
2265        0,
2266 /*40*/ MatAXPY_MPIAIJ,
2267        MatGetSubMatrices_MPIAIJ,
2268        MatIncreaseOverlap_MPIAIJ,
2269        MatGetValues_MPIAIJ,
2270        MatCopy_MPIAIJ,
2271 /*45*/ 0,
2272        MatScale_MPIAIJ,
2273        0,
2274        0,
2275        0,
2276 /*50*/ MatSetBlockSize_MPIAIJ,
2277        0,
2278        0,
2279        0,
2280        0,
2281 /*55*/ MatFDColoringCreate_MPIAIJ,
2282        0,
2283        MatSetUnfactored_MPIAIJ,
2284        MatPermute_MPIAIJ,
2285        0,
2286 /*60*/ MatGetSubMatrix_MPIAIJ,
2287        MatDestroy_MPIAIJ,
2288        MatView_MPIAIJ,
2289        0,
2290        0,
2291 /*65*/ 0,
2292        0,
2293        0,
2294        0,
2295        0,
2296 /*70*/ 0,
2297        0,
2298        MatSetColoring_MPIAIJ,
2299 #if defined(PETSC_HAVE_ADIC)
2300        MatSetValuesAdic_MPIAIJ,
2301 #else
2302        0,
2303 #endif
2304        MatSetValuesAdifor_MPIAIJ,
2305 /*75*/ 0,
2306        0,
2307        0,
2308        0,
2309        0,
2310 /*80*/ 0,
2311        0,
2312        0,
2313        0,
2314 /*84*/ MatLoad_MPIAIJ,
2315        0,
2316        0,
2317        0,
2318        0,
2319        0,
2320 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2321        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2322        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2323        MatPtAP_Basic,
2324        MatPtAPSymbolic_MPIAIJ,
2325 /*95*/ MatPtAPNumeric_MPIAIJ,
2326        0,
2327        0,
2328        0,
2329        0,
2330 /*100*/0,
2331        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2332        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2333        MatConjugate_MPIAIJ,
2334        0,
2335 /*105*/MatSetValuesRow_MPIAIJ,
2336        MatRealPart_MPIAIJ,
2337        MatImaginaryPart_MPIAIJ,
2338        0,
2339        0,
2340 /*110*/0,
2341        MatGetRedundantMatrix_MPIAIJ,
2342        MatGetRowMin_MPIAIJ,
2343        0,
2344        0,
2345 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2346 
2347 /* ----------------------------------------------------------------------------------------*/
2348 
2349 EXTERN_C_BEGIN
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2352 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2353 {
2354   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2359   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 EXTERN_C_END
2363 
2364 EXTERN_C_BEGIN
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2367 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2368 {
2369   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2374   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2375   PetscFunctionReturn(0);
2376 }
2377 EXTERN_C_END
2378 
2379 #include "petscpc.h"
2380 EXTERN_C_BEGIN
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2383 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2384 {
2385   Mat_MPIAIJ     *b;
2386   PetscErrorCode ierr;
2387   PetscInt       i;
2388 
2389   PetscFunctionBegin;
2390   B->preallocated = PETSC_TRUE;
2391   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2392   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2393   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2394   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2395 
2396   B->rmap.bs = B->cmap.bs = 1;
2397   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2398   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2399   if (d_nnz) {
2400     for (i=0; i<B->rmap.n; i++) {
2401       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2402     }
2403   }
2404   if (o_nnz) {
2405     for (i=0; i<B->rmap.n; i++) {
2406       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2407     }
2408   }
2409   b = (Mat_MPIAIJ*)B->data;
2410 
2411   /* Explicitly create 2 MATSEQAIJ matrices. */
2412   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2413   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2414   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2415   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2416   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2417   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2418   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2419   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2420 
2421   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2422   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2423 
2424   PetscFunctionReturn(0);
2425 }
2426 EXTERN_C_END
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2430 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2431 {
2432   Mat            mat;
2433   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2434   PetscErrorCode ierr;
2435 
2436   PetscFunctionBegin;
2437   *newmat       = 0;
2438   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2439   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2440   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2441   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2442   a    = (Mat_MPIAIJ*)mat->data;
2443 
2444   mat->factor       = matin->factor;
2445   mat->rmap.bs      = matin->rmap.bs;
2446   mat->assembled    = PETSC_TRUE;
2447   mat->insertmode   = NOT_SET_VALUES;
2448   mat->preallocated = PETSC_TRUE;
2449 
2450   a->size           = oldmat->size;
2451   a->rank           = oldmat->rank;
2452   a->donotstash     = oldmat->donotstash;
2453   a->roworiented    = oldmat->roworiented;
2454   a->rowindices     = 0;
2455   a->rowvalues      = 0;
2456   a->getrowactive   = PETSC_FALSE;
2457 
2458   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2459   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2460 
2461   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2462   if (oldmat->colmap) {
2463 #if defined (PETSC_USE_CTABLE)
2464     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2465 #else
2466     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2467     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2468     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2469 #endif
2470   } else a->colmap = 0;
2471   if (oldmat->garray) {
2472     PetscInt len;
2473     len  = oldmat->B->cmap.n;
2474     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2475     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2476     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2477   } else a->garray = 0;
2478 
2479   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2480   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2481   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2482   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2483   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2484   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2485   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2486   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2487   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2488   *newmat = mat;
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #include "petscsys.h"
2493 
2494 #undef __FUNCT__
2495 #define __FUNCT__ "MatLoad_MPIAIJ"
2496 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2497 {
2498   Mat            A;
2499   PetscScalar    *vals,*svals;
2500   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2501   MPI_Status     status;
2502   PetscErrorCode ierr;
2503   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2504   PetscInt       i,nz,j,rstart,rend,mmax;
2505   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2506   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2507   PetscInt       cend,cstart,n,*rowners;
2508   int            fd;
2509 
2510   PetscFunctionBegin;
2511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2512   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2513   if (!rank) {
2514     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2515     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2516     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2517   }
2518 
2519   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2520   M = header[1]; N = header[2];
2521   /* determine ownership of all rows */
2522   m    = M/size + ((M % size) > rank);
2523   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2524   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2525 
2526   /* First process needs enough room for process with most rows */
2527   if (!rank) {
2528     mmax       = rowners[1];
2529     for (i=2; i<size; i++) {
2530       mmax = PetscMax(mmax,rowners[i]);
2531     }
2532   } else mmax = m;
2533 
2534   rowners[0] = 0;
2535   for (i=2; i<=size; i++) {
2536     rowners[i] += rowners[i-1];
2537   }
2538   rstart = rowners[rank];
2539   rend   = rowners[rank+1];
2540 
2541   /* distribute row lengths to all processors */
2542   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2543   if (!rank) {
2544     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2545     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2546     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2547     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2548     for (j=0; j<m; j++) {
2549       procsnz[0] += ourlens[j];
2550     }
2551     for (i=1; i<size; i++) {
2552       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2553       /* calculate the number of nonzeros on each processor */
2554       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2555         procsnz[i] += rowlengths[j];
2556       }
2557       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2558     }
2559     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2560   } else {
2561     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2562   }
2563 
2564   if (!rank) {
2565     /* determine max buffer needed and allocate it */
2566     maxnz = 0;
2567     for (i=0; i<size; i++) {
2568       maxnz = PetscMax(maxnz,procsnz[i]);
2569     }
2570     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2571 
2572     /* read in my part of the matrix column indices  */
2573     nz   = procsnz[0];
2574     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2575     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2576 
2577     /* read in every one elses and ship off */
2578     for (i=1; i<size; i++) {
2579       nz   = procsnz[i];
2580       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2581       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2582     }
2583     ierr = PetscFree(cols);CHKERRQ(ierr);
2584   } else {
2585     /* determine buffer space needed for message */
2586     nz = 0;
2587     for (i=0; i<m; i++) {
2588       nz += ourlens[i];
2589     }
2590     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2591 
2592     /* receive message of column indices*/
2593     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2594     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2595     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2596   }
2597 
2598   /* determine column ownership if matrix is not square */
2599   if (N != M) {
2600     n      = N/size + ((N % size) > rank);
2601     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2602     cstart = cend - n;
2603   } else {
2604     cstart = rstart;
2605     cend   = rend;
2606     n      = cend - cstart;
2607   }
2608 
2609   /* loop over local rows, determining number of off diagonal entries */
2610   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2611   jj = 0;
2612   for (i=0; i<m; i++) {
2613     for (j=0; j<ourlens[i]; j++) {
2614       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2615       jj++;
2616     }
2617   }
2618 
2619   /* create our matrix */
2620   for (i=0; i<m; i++) {
2621     ourlens[i] -= offlens[i];
2622   }
2623   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2624   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2625   ierr = MatSetType(A,type);CHKERRQ(ierr);
2626   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2627 
2628   for (i=0; i<m; i++) {
2629     ourlens[i] += offlens[i];
2630   }
2631 
2632   if (!rank) {
2633     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2634 
2635     /* read in my part of the matrix numerical values  */
2636     nz   = procsnz[0];
2637     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2638 
2639     /* insert into matrix */
2640     jj      = rstart;
2641     smycols = mycols;
2642     svals   = vals;
2643     for (i=0; i<m; i++) {
2644       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2645       smycols += ourlens[i];
2646       svals   += ourlens[i];
2647       jj++;
2648     }
2649 
2650     /* read in other processors and ship out */
2651     for (i=1; i<size; i++) {
2652       nz   = procsnz[i];
2653       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2654       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2655     }
2656     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2657   } else {
2658     /* receive numeric values */
2659     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2660 
2661     /* receive message of values*/
2662     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2663     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2664     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2665 
2666     /* insert into matrix */
2667     jj      = rstart;
2668     smycols = mycols;
2669     svals   = vals;
2670     for (i=0; i<m; i++) {
2671       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2672       smycols += ourlens[i];
2673       svals   += ourlens[i];
2674       jj++;
2675     }
2676   }
2677   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2678   ierr = PetscFree(vals);CHKERRQ(ierr);
2679   ierr = PetscFree(mycols);CHKERRQ(ierr);
2680   ierr = PetscFree(rowners);CHKERRQ(ierr);
2681 
2682   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2683   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   *newmat = A;
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 #undef __FUNCT__
2689 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2690 /*
2691     Not great since it makes two copies of the submatrix, first an SeqAIJ
2692   in local and then by concatenating the local matrices the end result.
2693   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2694 */
2695 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2696 {
2697   PetscErrorCode ierr;
2698   PetscMPIInt    rank,size;
2699   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2700   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2701   Mat            *local,M,Mreuse;
2702   PetscScalar    *vwork,*aa;
2703   MPI_Comm       comm = ((PetscObject)mat)->comm;
2704   Mat_SeqAIJ     *aij;
2705 
2706 
2707   PetscFunctionBegin;
2708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2709   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2710 
2711   if (call ==  MAT_REUSE_MATRIX) {
2712     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2713     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2714     local = &Mreuse;
2715     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2716   } else {
2717     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2718     Mreuse = *local;
2719     ierr   = PetscFree(local);CHKERRQ(ierr);
2720   }
2721 
2722   /*
2723       m - number of local rows
2724       n - number of columns (same on all processors)
2725       rstart - first row in new global matrix generated
2726   */
2727   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2728   if (call == MAT_INITIAL_MATRIX) {
2729     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2730     ii  = aij->i;
2731     jj  = aij->j;
2732 
2733     /*
2734         Determine the number of non-zeros in the diagonal and off-diagonal
2735         portions of the matrix in order to do correct preallocation
2736     */
2737 
2738     /* first get start and end of "diagonal" columns */
2739     if (csize == PETSC_DECIDE) {
2740       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2741       if (mglobal == n) { /* square matrix */
2742 	nlocal = m;
2743       } else {
2744         nlocal = n/size + ((n % size) > rank);
2745       }
2746     } else {
2747       nlocal = csize;
2748     }
2749     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2750     rstart = rend - nlocal;
2751     if (rank == size - 1 && rend != n) {
2752       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2753     }
2754 
2755     /* next, compute all the lengths */
2756     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2757     olens = dlens + m;
2758     for (i=0; i<m; i++) {
2759       jend = ii[i+1] - ii[i];
2760       olen = 0;
2761       dlen = 0;
2762       for (j=0; j<jend; j++) {
2763         if (*jj < rstart || *jj >= rend) olen++;
2764         else dlen++;
2765         jj++;
2766       }
2767       olens[i] = olen;
2768       dlens[i] = dlen;
2769     }
2770     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2771     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2772     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2773     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2774     ierr = PetscFree(dlens);CHKERRQ(ierr);
2775   } else {
2776     PetscInt ml,nl;
2777 
2778     M = *newmat;
2779     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2780     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2781     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2782     /*
2783          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2784        rather than the slower MatSetValues().
2785     */
2786     M->was_assembled = PETSC_TRUE;
2787     M->assembled     = PETSC_FALSE;
2788   }
2789   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2790   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2791   ii  = aij->i;
2792   jj  = aij->j;
2793   aa  = aij->a;
2794   for (i=0; i<m; i++) {
2795     row   = rstart + i;
2796     nz    = ii[i+1] - ii[i];
2797     cwork = jj;     jj += nz;
2798     vwork = aa;     aa += nz;
2799     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2800   }
2801 
2802   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2803   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2804   *newmat = M;
2805 
2806   /* save submatrix used in processor for next request */
2807   if (call ==  MAT_INITIAL_MATRIX) {
2808     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2809     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2810   }
2811 
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 EXTERN_C_BEGIN
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2818 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2819 {
2820   PetscInt       m,cstart, cend,j,nnz,i,d;
2821   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2822   const PetscInt *JJ;
2823   PetscScalar    *values;
2824   PetscErrorCode ierr;
2825 
2826   PetscFunctionBegin;
2827   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2828 
2829   B->rmap.bs = B->cmap.bs = 1;
2830   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2831   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2832   m      = B->rmap.n;
2833   cstart = B->cmap.rstart;
2834   cend   = B->cmap.rend;
2835   rstart = B->rmap.rstart;
2836 
2837   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2838   o_nnz = d_nnz + m;
2839 
2840 #if defined(PETSC_USE_DEBUGGING)
2841   for (i=0; i<m; i++) {
2842     nnz     = Ii[i+1]- Ii[i];
2843     JJ      = J + Ii[i];
2844     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2845     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
2846     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
2847     for (j=1; j<nnz; j++) {
2848       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
2849     }
2850   }
2851 #endif
2852 
2853   for (i=0; i<m; i++) {
2854     nnz     = Ii[i+1]- Ii[i];
2855     JJ      = J + Ii[i];
2856     nnz_max = PetscMax(nnz_max,nnz);
2857     for (j=0; j<nnz; j++) {
2858       if (*JJ >= cstart) break;
2859       JJ++;
2860     }
2861     d = 0;
2862     for (; j<nnz; j++) {
2863       if (*JJ++ >= cend) break;
2864       d++;
2865     }
2866     d_nnz[i] = d;
2867     o_nnz[i] = nnz - d;
2868   }
2869   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2870   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2871 
2872   if (v) values = (PetscScalar*)v;
2873   else {
2874     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2875     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2876   }
2877 
2878   for (i=0; i<m; i++) {
2879     ii   = i + rstart;
2880     nnz  = Ii[i+1]- Ii[i];
2881     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2882   }
2883   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2884   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2885 
2886   if (!v) {
2887     ierr = PetscFree(values);CHKERRQ(ierr);
2888   }
2889   PetscFunctionReturn(0);
2890 }
2891 EXTERN_C_END
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2895 /*@
2896    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2897    (the default parallel PETSc format).
2898 
2899    Collective on MPI_Comm
2900 
2901    Input Parameters:
2902 +  B - the matrix
2903 .  i - the indices into j for the start of each local row (starts with zero)
2904 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2905 -  v - optional values in the matrix
2906 
2907    Level: developer
2908 
2909    Notes:
2910        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2911      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2912      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2913 
2914        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2915 
2916        The format which is used for the sparse matrix input, is equivalent to a
2917     row-major ordering.. i.e for the following matrix, the input data expected is
2918     as shown:
2919 
2920         1 0 0
2921         2 0 3     P0
2922        -------
2923         4 5 6     P1
2924 
2925      Process0 [P0]: rows_owned=[0,1]
2926         i =  {0,1,3}  [size = nrow+1  = 2+1]
2927         j =  {0,0,2}  [size = nz = 6]
2928         v =  {1,2,3}  [size = nz = 6]
2929 
2930      Process1 [P1]: rows_owned=[2]
2931         i =  {0,3}    [size = nrow+1  = 1+1]
2932         j =  {0,1,2}  [size = nz = 6]
2933         v =  {4,5,6}  [size = nz = 6]
2934 
2935       The column indices for each row MUST be sorted.
2936 
2937 .keywords: matrix, aij, compressed row, sparse, parallel
2938 
2939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2940           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2941 @*/
2942 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2943 {
2944   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2945 
2946   PetscFunctionBegin;
2947   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2948   if (f) {
2949     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2950   }
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 #undef __FUNCT__
2955 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2956 /*@C
2957    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2958    (the default parallel PETSc format).  For good matrix assembly performance
2959    the user should preallocate the matrix storage by setting the parameters
2960    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2961    performance can be increased by more than a factor of 50.
2962 
2963    Collective on MPI_Comm
2964 
2965    Input Parameters:
2966 +  A - the matrix
2967 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2968            (same value is used for all local rows)
2969 .  d_nnz - array containing the number of nonzeros in the various rows of the
2970            DIAGONAL portion of the local submatrix (possibly different for each row)
2971            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2972            The size of this array is equal to the number of local rows, i.e 'm'.
2973            You must leave room for the diagonal entry even if it is zero.
2974 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2975            submatrix (same value is used for all local rows).
2976 -  o_nnz - array containing the number of nonzeros in the various rows of the
2977            OFF-DIAGONAL portion of the local submatrix (possibly different for
2978            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2979            structure. The size of this array is equal to the number
2980            of local rows, i.e 'm'.
2981 
2982    If the *_nnz parameter is given then the *_nz parameter is ignored
2983 
2984    The AIJ format (also called the Yale sparse matrix format or
2985    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2986    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2987 
2988    The parallel matrix is partitioned such that the first m0 rows belong to
2989    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2990    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2991 
2992    The DIAGONAL portion of the local submatrix of a processor can be defined
2993    as the submatrix which is obtained by extraction the part corresponding
2994    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2995    first row that belongs to the processor, and r2 is the last row belonging
2996    to the this processor. This is a square mxm matrix. The remaining portion
2997    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2998 
2999    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3000 
3001    You can call MatGetInfo() to get information on how effective the preallocation was;
3002    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3003    You can also run with the option -info and look for messages with the string
3004    malloc in them to see if additional memory allocation was needed.
3005 
3006    Example usage:
3007 
3008    Consider the following 8x8 matrix with 34 non-zero values, that is
3009    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3010    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3011    as follows:
3012 
3013 .vb
3014             1  2  0  |  0  3  0  |  0  4
3015     Proc0   0  5  6  |  7  0  0  |  8  0
3016             9  0 10  | 11  0  0  | 12  0
3017     -------------------------------------
3018            13  0 14  | 15 16 17  |  0  0
3019     Proc1   0 18  0  | 19 20 21  |  0  0
3020             0  0  0  | 22 23  0  | 24  0
3021     -------------------------------------
3022     Proc2  25 26 27  |  0  0 28  | 29  0
3023            30  0  0  | 31 32 33  |  0 34
3024 .ve
3025 
3026    This can be represented as a collection of submatrices as:
3027 
3028 .vb
3029       A B C
3030       D E F
3031       G H I
3032 .ve
3033 
3034    Where the submatrices A,B,C are owned by proc0, D,E,F are
3035    owned by proc1, G,H,I are owned by proc2.
3036 
3037    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3038    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3039    The 'M','N' parameters are 8,8, and have the same values on all procs.
3040 
3041    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3042    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3043    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3044    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3045    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3046    matrix, ans [DF] as another SeqAIJ matrix.
3047 
3048    When d_nz, o_nz parameters are specified, d_nz storage elements are
3049    allocated for every row of the local diagonal submatrix, and o_nz
3050    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3051    One way to choose d_nz and o_nz is to use the max nonzerors per local
3052    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3053    In this case, the values of d_nz,o_nz are:
3054 .vb
3055      proc0 : dnz = 2, o_nz = 2
3056      proc1 : dnz = 3, o_nz = 2
3057      proc2 : dnz = 1, o_nz = 4
3058 .ve
3059    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3060    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3061    for proc3. i.e we are using 12+15+10=37 storage locations to store
3062    34 values.
3063 
3064    When d_nnz, o_nnz parameters are specified, the storage is specified
3065    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3066    In the above case the values for d_nnz,o_nnz are:
3067 .vb
3068      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3069      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3070      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3071 .ve
3072    Here the space allocated is sum of all the above values i.e 34, and
3073    hence pre-allocation is perfect.
3074 
3075    Level: intermediate
3076 
3077 .keywords: matrix, aij, compressed row, sparse, parallel
3078 
3079 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3080           MPIAIJ, MatGetInfo()
3081 @*/
3082 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3083 {
3084   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3085 
3086   PetscFunctionBegin;
3087   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3088   if (f) {
3089     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3090   }
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 #undef __FUNCT__
3095 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3096 /*@
3097      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3098          CSR format the local rows.
3099 
3100    Collective on MPI_Comm
3101 
3102    Input Parameters:
3103 +  comm - MPI communicator
3104 .  m - number of local rows (Cannot be PETSC_DECIDE)
3105 .  n - This value should be the same as the local size used in creating the
3106        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3107        calculated if N is given) For square matrices n is almost always m.
3108 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3109 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3110 .   i - row indices
3111 .   j - column indices
3112 -   a - matrix values
3113 
3114    Output Parameter:
3115 .   mat - the matrix
3116 
3117    Level: intermediate
3118 
3119    Notes:
3120        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3121      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3122      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3123 
3124        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3125 
3126        The format which is used for the sparse matrix input, is equivalent to a
3127     row-major ordering.. i.e for the following matrix, the input data expected is
3128     as shown:
3129 
3130         1 0 0
3131         2 0 3     P0
3132        -------
3133         4 5 6     P1
3134 
3135      Process0 [P0]: rows_owned=[0,1]
3136         i =  {0,1,3}  [size = nrow+1  = 2+1]
3137         j =  {0,0,2}  [size = nz = 6]
3138         v =  {1,2,3}  [size = nz = 6]
3139 
3140      Process1 [P1]: rows_owned=[2]
3141         i =  {0,3}    [size = nrow+1  = 1+1]
3142         j =  {0,1,2}  [size = nz = 6]
3143         v =  {4,5,6}  [size = nz = 6]
3144 
3145 .keywords: matrix, aij, compressed row, sparse, parallel
3146 
3147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3148           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3149 @*/
3150 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3151 {
3152   PetscErrorCode ierr;
3153 
3154  PetscFunctionBegin;
3155   if (i[0]) {
3156     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3157   }
3158   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3159   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3160   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3161   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3162   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 #undef __FUNCT__
3167 #define __FUNCT__ "MatCreateMPIAIJ"
3168 /*@C
3169    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3170    (the default parallel PETSc format).  For good matrix assembly performance
3171    the user should preallocate the matrix storage by setting the parameters
3172    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3173    performance can be increased by more than a factor of 50.
3174 
3175    Collective on MPI_Comm
3176 
3177    Input Parameters:
3178 +  comm - MPI communicator
3179 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3180            This value should be the same as the local size used in creating the
3181            y vector for the matrix-vector product y = Ax.
3182 .  n - This value should be the same as the local size used in creating the
3183        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3184        calculated if N is given) For square matrices n is almost always m.
3185 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3186 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3187 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3188            (same value is used for all local rows)
3189 .  d_nnz - array containing the number of nonzeros in the various rows of the
3190            DIAGONAL portion of the local submatrix (possibly different for each row)
3191            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3192            The size of this array is equal to the number of local rows, i.e 'm'.
3193            You must leave room for the diagonal entry even if it is zero.
3194 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3195            submatrix (same value is used for all local rows).
3196 -  o_nnz - array containing the number of nonzeros in the various rows of the
3197            OFF-DIAGONAL portion of the local submatrix (possibly different for
3198            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3199            structure. The size of this array is equal to the number
3200            of local rows, i.e 'm'.
3201 
3202    Output Parameter:
3203 .  A - the matrix
3204 
3205    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3206    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3207    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3208    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3209 
3210    Notes:
3211    If the *_nnz parameter is given then the *_nz parameter is ignored
3212 
3213    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3214    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3215    storage requirements for this matrix.
3216 
3217    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3218    processor than it must be used on all processors that share the object for
3219    that argument.
3220 
3221    The user MUST specify either the local or global matrix dimensions
3222    (possibly both).
3223 
3224    The parallel matrix is partitioned across processors such that the
3225    first m0 rows belong to process 0, the next m1 rows belong to
3226    process 1, the next m2 rows belong to process 2 etc.. where
3227    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3228    values corresponding to [m x N] submatrix.
3229 
3230    The columns are logically partitioned with the n0 columns belonging
3231    to 0th partition, the next n1 columns belonging to the next
3232    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3233 
3234    The DIAGONAL portion of the local submatrix on any given processor
3235    is the submatrix corresponding to the rows and columns m,n
3236    corresponding to the given processor. i.e diagonal matrix on
3237    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3238    etc. The remaining portion of the local submatrix [m x (N-n)]
3239    constitute the OFF-DIAGONAL portion. The example below better
3240    illustrates this concept.
3241 
3242    For a square global matrix we define each processor's diagonal portion
3243    to be its local rows and the corresponding columns (a square submatrix);
3244    each processor's off-diagonal portion encompasses the remainder of the
3245    local matrix (a rectangular submatrix).
3246 
3247    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3248 
3249    When calling this routine with a single process communicator, a matrix of
3250    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3251    type of communicator, use the construction mechanism:
3252      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3253 
3254    By default, this format uses inodes (identical nodes) when possible.
3255    We search for consecutive rows with the same nonzero structure, thereby
3256    reusing matrix information to achieve increased efficiency.
3257 
3258    Options Database Keys:
3259 +  -mat_no_inode  - Do not use inodes
3260 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3261 -  -mat_aij_oneindex - Internally use indexing starting at 1
3262         rather than 0.  Note that when calling MatSetValues(),
3263         the user still MUST index entries starting at 0!
3264 
3265 
3266    Example usage:
3267 
3268    Consider the following 8x8 matrix with 34 non-zero values, that is
3269    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3270    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3271    as follows:
3272 
3273 .vb
3274             1  2  0  |  0  3  0  |  0  4
3275     Proc0   0  5  6  |  7  0  0  |  8  0
3276             9  0 10  | 11  0  0  | 12  0
3277     -------------------------------------
3278            13  0 14  | 15 16 17  |  0  0
3279     Proc1   0 18  0  | 19 20 21  |  0  0
3280             0  0  0  | 22 23  0  | 24  0
3281     -------------------------------------
3282     Proc2  25 26 27  |  0  0 28  | 29  0
3283            30  0  0  | 31 32 33  |  0 34
3284 .ve
3285 
3286    This can be represented as a collection of submatrices as:
3287 
3288 .vb
3289       A B C
3290       D E F
3291       G H I
3292 .ve
3293 
3294    Where the submatrices A,B,C are owned by proc0, D,E,F are
3295    owned by proc1, G,H,I are owned by proc2.
3296 
3297    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3298    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3299    The 'M','N' parameters are 8,8, and have the same values on all procs.
3300 
3301    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3302    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3303    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3304    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3305    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3306    matrix, ans [DF] as another SeqAIJ matrix.
3307 
3308    When d_nz, o_nz parameters are specified, d_nz storage elements are
3309    allocated for every row of the local diagonal submatrix, and o_nz
3310    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3311    One way to choose d_nz and o_nz is to use the max nonzerors per local
3312    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3313    In this case, the values of d_nz,o_nz are:
3314 .vb
3315      proc0 : dnz = 2, o_nz = 2
3316      proc1 : dnz = 3, o_nz = 2
3317      proc2 : dnz = 1, o_nz = 4
3318 .ve
3319    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3320    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3321    for proc3. i.e we are using 12+15+10=37 storage locations to store
3322    34 values.
3323 
3324    When d_nnz, o_nnz parameters are specified, the storage is specified
3325    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3326    In the above case the values for d_nnz,o_nnz are:
3327 .vb
3328      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3329      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3330      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3331 .ve
3332    Here the space allocated is sum of all the above values i.e 34, and
3333    hence pre-allocation is perfect.
3334 
3335    Level: intermediate
3336 
3337 .keywords: matrix, aij, compressed row, sparse, parallel
3338 
3339 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3340           MPIAIJ, MatCreateMPIAIJWithArrays()
3341 @*/
3342 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3343 {
3344   PetscErrorCode ierr;
3345   PetscMPIInt    size;
3346 
3347   PetscFunctionBegin;
3348   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3349   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3350   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3351   if (size > 1) {
3352     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3353     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3354   } else {
3355     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3356     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3357   }
3358   PetscFunctionReturn(0);
3359 }
3360 
3361 #undef __FUNCT__
3362 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3363 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3364 {
3365   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3366 
3367   PetscFunctionBegin;
3368   *Ad     = a->A;
3369   *Ao     = a->B;
3370   *colmap = a->garray;
3371   PetscFunctionReturn(0);
3372 }
3373 
3374 #undef __FUNCT__
3375 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3376 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3377 {
3378   PetscErrorCode ierr;
3379   PetscInt       i;
3380   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3381 
3382   PetscFunctionBegin;
3383   if (coloring->ctype == IS_COLORING_GLOBAL) {
3384     ISColoringValue *allcolors,*colors;
3385     ISColoring      ocoloring;
3386 
3387     /* set coloring for diagonal portion */
3388     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3389 
3390     /* set coloring for off-diagonal portion */
3391     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3392     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3393     for (i=0; i<a->B->cmap.n; i++) {
3394       colors[i] = allcolors[a->garray[i]];
3395     }
3396     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3397     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3398     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3399     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3400   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3401     ISColoringValue *colors;
3402     PetscInt        *larray;
3403     ISColoring      ocoloring;
3404 
3405     /* set coloring for diagonal portion */
3406     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3407     for (i=0; i<a->A->cmap.n; i++) {
3408       larray[i] = i + A->cmap.rstart;
3409     }
3410     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3411     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3412     for (i=0; i<a->A->cmap.n; i++) {
3413       colors[i] = coloring->colors[larray[i]];
3414     }
3415     ierr = PetscFree(larray);CHKERRQ(ierr);
3416     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3417     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3418     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3419 
3420     /* set coloring for off-diagonal portion */
3421     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3422     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3423     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3424     for (i=0; i<a->B->cmap.n; i++) {
3425       colors[i] = coloring->colors[larray[i]];
3426     }
3427     ierr = PetscFree(larray);CHKERRQ(ierr);
3428     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3429     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3430     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3431   } else {
3432     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3433   }
3434 
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 #if defined(PETSC_HAVE_ADIC)
3439 #undef __FUNCT__
3440 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3441 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3442 {
3443   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3444   PetscErrorCode ierr;
3445 
3446   PetscFunctionBegin;
3447   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3448   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3449   PetscFunctionReturn(0);
3450 }
3451 #endif
3452 
3453 #undef __FUNCT__
3454 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3455 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3456 {
3457   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3458   PetscErrorCode ierr;
3459 
3460   PetscFunctionBegin;
3461   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3462   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 #undef __FUNCT__
3467 #define __FUNCT__ "MatMerge"
3468 /*@
3469       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3470                  matrices from each processor
3471 
3472     Collective on MPI_Comm
3473 
3474    Input Parameters:
3475 +    comm - the communicators the parallel matrix will live on
3476 .    inmat - the input sequential matrices
3477 .    n - number of local columns (or PETSC_DECIDE)
3478 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3479 
3480    Output Parameter:
3481 .    outmat - the parallel matrix generated
3482 
3483     Level: advanced
3484 
3485    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3486 
3487 @*/
3488 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3489 {
3490   PetscErrorCode ierr;
3491   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3492   PetscInt       *indx;
3493   PetscScalar    *values;
3494 
3495   PetscFunctionBegin;
3496   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3497   if (scall == MAT_INITIAL_MATRIX){
3498     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3499     if (n == PETSC_DECIDE){
3500       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3501     }
3502     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3503     rstart -= m;
3504 
3505     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3506     for (i=0;i<m;i++) {
3507       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3508       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3509       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3510     }
3511     /* This routine will ONLY return MPIAIJ type matrix */
3512     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3513     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3514     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3515     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3516     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3517 
3518   } else if (scall == MAT_REUSE_MATRIX){
3519     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3520   } else {
3521     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3522   }
3523 
3524   for (i=0;i<m;i++) {
3525     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3526     Ii    = i + rstart;
3527     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3528     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3529   }
3530   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3531   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3532   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533 
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 #undef __FUNCT__
3538 #define __FUNCT__ "MatFileSplit"
3539 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3540 {
3541   PetscErrorCode    ierr;
3542   PetscMPIInt       rank;
3543   PetscInt          m,N,i,rstart,nnz;
3544   size_t            len;
3545   const PetscInt    *indx;
3546   PetscViewer       out;
3547   char              *name;
3548   Mat               B;
3549   const PetscScalar *values;
3550 
3551   PetscFunctionBegin;
3552   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3553   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3554   /* Should this be the type of the diagonal block of A? */
3555   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3556   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3557   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3558   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3559   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3560   for (i=0;i<m;i++) {
3561     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3562     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3563     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3564   }
3565   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3566   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567 
3568   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3569   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3570   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3571   sprintf(name,"%s.%d",outfile,rank);
3572   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3573   ierr = PetscFree(name);
3574   ierr = MatView(B,out);CHKERRQ(ierr);
3575   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3576   ierr = MatDestroy(B);CHKERRQ(ierr);
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3581 #undef __FUNCT__
3582 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3583 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3584 {
3585   PetscErrorCode       ierr;
3586   Mat_Merge_SeqsToMPI  *merge;
3587   PetscContainer       container;
3588 
3589   PetscFunctionBegin;
3590   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3591   if (container) {
3592     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3593     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3594     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3595     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3596     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3597     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3598     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3599     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3600     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3601     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3602     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3603     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3604 
3605     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3606     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3607   }
3608   ierr = PetscFree(merge);CHKERRQ(ierr);
3609 
3610   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3611   PetscFunctionReturn(0);
3612 }
3613 
3614 #include "src/mat/utils/freespace.h"
3615 #include "petscbt.h"
3616 
3617 #undef __FUNCT__
3618 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3619 /*@C
3620       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3621                  matrices from each processor
3622 
3623     Collective on MPI_Comm
3624 
3625    Input Parameters:
3626 +    comm - the communicators the parallel matrix will live on
3627 .    seqmat - the input sequential matrices
3628 .    m - number of local rows (or PETSC_DECIDE)
3629 .    n - number of local columns (or PETSC_DECIDE)
3630 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3631 
3632    Output Parameter:
3633 .    mpimat - the parallel matrix generated
3634 
3635     Level: advanced
3636 
3637    Notes:
3638      The dimensions of the sequential matrix in each processor MUST be the same.
3639      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3640      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3641 @*/
3642 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3643 {
3644   PetscErrorCode       ierr;
3645   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3646   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3647   PetscMPIInt          size,rank,taga,*len_s;
3648   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3649   PetscInt             proc,m;
3650   PetscInt             **buf_ri,**buf_rj;
3651   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3652   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3653   MPI_Request          *s_waits,*r_waits;
3654   MPI_Status           *status;
3655   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3656   Mat_Merge_SeqsToMPI  *merge;
3657   PetscContainer       container;
3658 
3659   PetscFunctionBegin;
3660   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3661 
3662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3663   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3664 
3665   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3666   if (container) {
3667     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3668   }
3669   bi     = merge->bi;
3670   bj     = merge->bj;
3671   buf_ri = merge->buf_ri;
3672   buf_rj = merge->buf_rj;
3673 
3674   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3675   owners = merge->rowmap.range;
3676   len_s  = merge->len_s;
3677 
3678   /* send and recv matrix values */
3679   /*-----------------------------*/
3680   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3681   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3682 
3683   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3684   for (proc=0,k=0; proc<size; proc++){
3685     if (!len_s[proc]) continue;
3686     i = owners[proc];
3687     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3688     k++;
3689   }
3690 
3691   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3692   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3693   ierr = PetscFree(status);CHKERRQ(ierr);
3694 
3695   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3696   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3697 
3698   /* insert mat values of mpimat */
3699   /*----------------------------*/
3700   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3701   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3702   nextrow = buf_ri_k + merge->nrecv;
3703   nextai  = nextrow + merge->nrecv;
3704 
3705   for (k=0; k<merge->nrecv; k++){
3706     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3707     nrows = *(buf_ri_k[k]);
3708     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3709     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3710   }
3711 
3712   /* set values of ba */
3713   m = merge->rowmap.n;
3714   for (i=0; i<m; i++) {
3715     arow = owners[rank] + i;
3716     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3717     bnzi = bi[i+1] - bi[i];
3718     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3719 
3720     /* add local non-zero vals of this proc's seqmat into ba */
3721     anzi = ai[arow+1] - ai[arow];
3722     aj   = a->j + ai[arow];
3723     aa   = a->a + ai[arow];
3724     nextaj = 0;
3725     for (j=0; nextaj<anzi; j++){
3726       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3727         ba_i[j] += aa[nextaj++];
3728       }
3729     }
3730 
3731     /* add received vals into ba */
3732     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3733       /* i-th row */
3734       if (i == *nextrow[k]) {
3735         anzi = *(nextai[k]+1) - *nextai[k];
3736         aj   = buf_rj[k] + *(nextai[k]);
3737         aa   = abuf_r[k] + *(nextai[k]);
3738         nextaj = 0;
3739         for (j=0; nextaj<anzi; j++){
3740           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3741             ba_i[j] += aa[nextaj++];
3742           }
3743         }
3744         nextrow[k]++; nextai[k]++;
3745       }
3746     }
3747     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3748   }
3749   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3750   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3751 
3752   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3753   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3754   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3755   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3762 {
3763   PetscErrorCode       ierr;
3764   Mat                  B_mpi;
3765   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3766   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3767   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3768   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3769   PetscInt             len,proc,*dnz,*onz;
3770   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3771   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3772   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3773   MPI_Status           *status;
3774   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3775   PetscBT              lnkbt;
3776   Mat_Merge_SeqsToMPI  *merge;
3777   PetscContainer       container;
3778 
3779   PetscFunctionBegin;
3780   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3781 
3782   /* make sure it is a PETSc comm */
3783   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3784   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3785   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3786 
3787   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3788   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3789 
3790   /* determine row ownership */
3791   /*---------------------------------------------------------*/
3792   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3793   merge->rowmap.n = m;
3794   merge->rowmap.N = M;
3795   merge->rowmap.bs = 1;
3796   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3797   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3798   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3799 
3800   m      = merge->rowmap.n;
3801   M      = merge->rowmap.N;
3802   owners = merge->rowmap.range;
3803 
3804   /* determine the number of messages to send, their lengths */
3805   /*---------------------------------------------------------*/
3806   len_s  = merge->len_s;
3807 
3808   len = 0;  /* length of buf_si[] */
3809   merge->nsend = 0;
3810   for (proc=0; proc<size; proc++){
3811     len_si[proc] = 0;
3812     if (proc == rank){
3813       len_s[proc] = 0;
3814     } else {
3815       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3816       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3817     }
3818     if (len_s[proc]) {
3819       merge->nsend++;
3820       nrows = 0;
3821       for (i=owners[proc]; i<owners[proc+1]; i++){
3822         if (ai[i+1] > ai[i]) nrows++;
3823       }
3824       len_si[proc] = 2*(nrows+1);
3825       len += len_si[proc];
3826     }
3827   }
3828 
3829   /* determine the number and length of messages to receive for ij-structure */
3830   /*-------------------------------------------------------------------------*/
3831   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3832   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3833 
3834   /* post the Irecv of j-structure */
3835   /*-------------------------------*/
3836   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3837   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3838 
3839   /* post the Isend of j-structure */
3840   /*--------------------------------*/
3841   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3842   sj_waits = si_waits + merge->nsend;
3843 
3844   for (proc=0, k=0; proc<size; proc++){
3845     if (!len_s[proc]) continue;
3846     i = owners[proc];
3847     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3848     k++;
3849   }
3850 
3851   /* receives and sends of j-structure are complete */
3852   /*------------------------------------------------*/
3853   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3854   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3855 
3856   /* send and recv i-structure */
3857   /*---------------------------*/
3858   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3859   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3860 
3861   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3862   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3863   for (proc=0,k=0; proc<size; proc++){
3864     if (!len_s[proc]) continue;
3865     /* form outgoing message for i-structure:
3866          buf_si[0]:                 nrows to be sent
3867                [1:nrows]:           row index (global)
3868                [nrows+1:2*nrows+1]: i-structure index
3869     */
3870     /*-------------------------------------------*/
3871     nrows = len_si[proc]/2 - 1;
3872     buf_si_i    = buf_si + nrows+1;
3873     buf_si[0]   = nrows;
3874     buf_si_i[0] = 0;
3875     nrows = 0;
3876     for (i=owners[proc]; i<owners[proc+1]; i++){
3877       anzi = ai[i+1] - ai[i];
3878       if (anzi) {
3879         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3880         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3881         nrows++;
3882       }
3883     }
3884     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3885     k++;
3886     buf_si += len_si[proc];
3887   }
3888 
3889   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3890   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3891 
3892   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3893   for (i=0; i<merge->nrecv; i++){
3894     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3895   }
3896 
3897   ierr = PetscFree(len_si);CHKERRQ(ierr);
3898   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3899   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3900   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3901   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3902   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3903   ierr = PetscFree(status);CHKERRQ(ierr);
3904 
3905   /* compute a local seq matrix in each processor */
3906   /*----------------------------------------------*/
3907   /* allocate bi array and free space for accumulating nonzero column info */
3908   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3909   bi[0] = 0;
3910 
3911   /* create and initialize a linked list */
3912   nlnk = N+1;
3913   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3914 
3915   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3916   len = 0;
3917   len  = ai[owners[rank+1]] - ai[owners[rank]];
3918   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3919   current_space = free_space;
3920 
3921   /* determine symbolic info for each local row */
3922   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3923   nextrow = buf_ri_k + merge->nrecv;
3924   nextai  = nextrow + merge->nrecv;
3925   for (k=0; k<merge->nrecv; k++){
3926     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3927     nrows = *buf_ri_k[k];
3928     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3929     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3930   }
3931 
3932   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3933   len = 0;
3934   for (i=0;i<m;i++) {
3935     bnzi   = 0;
3936     /* add local non-zero cols of this proc's seqmat into lnk */
3937     arow   = owners[rank] + i;
3938     anzi   = ai[arow+1] - ai[arow];
3939     aj     = a->j + ai[arow];
3940     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3941     bnzi += nlnk;
3942     /* add received col data into lnk */
3943     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3944       if (i == *nextrow[k]) { /* i-th row */
3945         anzi = *(nextai[k]+1) - *nextai[k];
3946         aj   = buf_rj[k] + *nextai[k];
3947         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3948         bnzi += nlnk;
3949         nextrow[k]++; nextai[k]++;
3950       }
3951     }
3952     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3953 
3954     /* if free space is not available, make more free space */
3955     if (current_space->local_remaining<bnzi) {
3956       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3957       nspacedouble++;
3958     }
3959     /* copy data into free space, then initialize lnk */
3960     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3961     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3962 
3963     current_space->array           += bnzi;
3964     current_space->local_used      += bnzi;
3965     current_space->local_remaining -= bnzi;
3966 
3967     bi[i+1] = bi[i] + bnzi;
3968   }
3969 
3970   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3971 
3972   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3973   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3974   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3975 
3976   /* create symbolic parallel matrix B_mpi */
3977   /*---------------------------------------*/
3978   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3979   if (n==PETSC_DECIDE) {
3980     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3981   } else {
3982     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3983   }
3984   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3985   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3986   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3987 
3988   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3989   B_mpi->assembled     = PETSC_FALSE;
3990   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3991   merge->bi            = bi;
3992   merge->bj            = bj;
3993   merge->buf_ri        = buf_ri;
3994   merge->buf_rj        = buf_rj;
3995   merge->coi           = PETSC_NULL;
3996   merge->coj           = PETSC_NULL;
3997   merge->owners_co     = PETSC_NULL;
3998 
3999   /* attach the supporting struct to B_mpi for reuse */
4000   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4001   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4002   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4003   *mpimat = B_mpi;
4004 
4005   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4006   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 #undef __FUNCT__
4011 #define __FUNCT__ "MatMerge_SeqsToMPI"
4012 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4013 {
4014   PetscErrorCode   ierr;
4015 
4016   PetscFunctionBegin;
4017   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4018   if (scall == MAT_INITIAL_MATRIX){
4019     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4020   }
4021   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4022   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4023   PetscFunctionReturn(0);
4024 }
4025 
4026 #undef __FUNCT__
4027 #define __FUNCT__ "MatGetLocalMat"
4028 /*@
4029      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4030 
4031     Not Collective
4032 
4033    Input Parameters:
4034 +    A - the matrix
4035 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4036 
4037    Output Parameter:
4038 .    A_loc - the local sequential matrix generated
4039 
4040     Level: developer
4041 
4042 @*/
4043 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4044 {
4045   PetscErrorCode  ierr;
4046   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4047   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4048   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4049   PetscScalar     *aa=a->a,*ba=b->a,*ca;
4050   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4051   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4052 
4053   PetscFunctionBegin;
4054   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4055   if (scall == MAT_INITIAL_MATRIX){
4056     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4057     ci[0] = 0;
4058     for (i=0; i<am; i++){
4059       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4060     }
4061     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4062     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4063     k = 0;
4064     for (i=0; i<am; i++) {
4065       ncols_o = bi[i+1] - bi[i];
4066       ncols_d = ai[i+1] - ai[i];
4067       /* off-diagonal portion of A */
4068       for (jo=0; jo<ncols_o; jo++) {
4069         col = cmap[*bj];
4070         if (col >= cstart) break;
4071         cj[k]   = col; bj++;
4072         ca[k++] = *ba++;
4073       }
4074       /* diagonal portion of A */
4075       for (j=0; j<ncols_d; j++) {
4076         cj[k]   = cstart + *aj++;
4077         ca[k++] = *aa++;
4078       }
4079       /* off-diagonal portion of A */
4080       for (j=jo; j<ncols_o; j++) {
4081         cj[k]   = cmap[*bj++];
4082         ca[k++] = *ba++;
4083       }
4084     }
4085     /* put together the new matrix */
4086     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4087     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4088     /* Since these are PETSc arrays, change flags to free them as necessary. */
4089     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4090     mat->free_a  = PETSC_TRUE;
4091     mat->free_ij = PETSC_TRUE;
4092     mat->nonew   = 0;
4093   } else if (scall == MAT_REUSE_MATRIX){
4094     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4095     ci = mat->i; cj = mat->j; ca = mat->a;
4096     for (i=0; i<am; i++) {
4097       /* off-diagonal portion of A */
4098       ncols_o = bi[i+1] - bi[i];
4099       for (jo=0; jo<ncols_o; jo++) {
4100         col = cmap[*bj];
4101         if (col >= cstart) break;
4102         *ca++ = *ba++; bj++;
4103       }
4104       /* diagonal portion of A */
4105       ncols_d = ai[i+1] - ai[i];
4106       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4107       /* off-diagonal portion of A */
4108       for (j=jo; j<ncols_o; j++) {
4109         *ca++ = *ba++; bj++;
4110       }
4111     }
4112   } else {
4113     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4114   }
4115 
4116   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4117   PetscFunctionReturn(0);
4118 }
4119 
4120 #undef __FUNCT__
4121 #define __FUNCT__ "MatGetLocalMatCondensed"
4122 /*@C
4123      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4124 
4125     Not Collective
4126 
4127    Input Parameters:
4128 +    A - the matrix
4129 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4130 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4131 
4132    Output Parameter:
4133 .    A_loc - the local sequential matrix generated
4134 
4135     Level: developer
4136 
4137 @*/
4138 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4139 {
4140   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4141   PetscErrorCode    ierr;
4142   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4143   IS                isrowa,iscola;
4144   Mat               *aloc;
4145 
4146   PetscFunctionBegin;
4147   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4148   if (!row){
4149     start = A->rmap.rstart; end = A->rmap.rend;
4150     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4151   } else {
4152     isrowa = *row;
4153   }
4154   if (!col){
4155     start = A->cmap.rstart;
4156     cmap  = a->garray;
4157     nzA   = a->A->cmap.n;
4158     nzB   = a->B->cmap.n;
4159     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4160     ncols = 0;
4161     for (i=0; i<nzB; i++) {
4162       if (cmap[i] < start) idx[ncols++] = cmap[i];
4163       else break;
4164     }
4165     imark = i;
4166     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4167     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4168     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4169     ierr = PetscFree(idx);CHKERRQ(ierr);
4170   } else {
4171     iscola = *col;
4172   }
4173   if (scall != MAT_INITIAL_MATRIX){
4174     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4175     aloc[0] = *A_loc;
4176   }
4177   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4178   *A_loc = aloc[0];
4179   ierr = PetscFree(aloc);CHKERRQ(ierr);
4180   if (!row){
4181     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4182   }
4183   if (!col){
4184     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4185   }
4186   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4187   PetscFunctionReturn(0);
4188 }
4189 
4190 #undef __FUNCT__
4191 #define __FUNCT__ "MatGetBrowsOfAcols"
4192 /*@C
4193     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4194 
4195     Collective on Mat
4196 
4197    Input Parameters:
4198 +    A,B - the matrices in mpiaij format
4199 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4200 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4201 
4202    Output Parameter:
4203 +    rowb, colb - index sets of rows and columns of B to extract
4204 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4205 -    B_seq - the sequential matrix generated
4206 
4207     Level: developer
4208 
4209 @*/
4210 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4211 {
4212   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4213   PetscErrorCode    ierr;
4214   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4215   IS                isrowb,iscolb;
4216   Mat               *bseq;
4217 
4218   PetscFunctionBegin;
4219   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4220     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4221   }
4222   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4223 
4224   if (scall == MAT_INITIAL_MATRIX){
4225     start = A->cmap.rstart;
4226     cmap  = a->garray;
4227     nzA   = a->A->cmap.n;
4228     nzB   = a->B->cmap.n;
4229     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4230     ncols = 0;
4231     for (i=0; i<nzB; i++) {  /* row < local row index */
4232       if (cmap[i] < start) idx[ncols++] = cmap[i];
4233       else break;
4234     }
4235     imark = i;
4236     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4237     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4238     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4239     ierr = PetscFree(idx);CHKERRQ(ierr);
4240     *brstart = imark;
4241     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4242   } else {
4243     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4244     isrowb = *rowb; iscolb = *colb;
4245     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4246     bseq[0] = *B_seq;
4247   }
4248   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4249   *B_seq = bseq[0];
4250   ierr = PetscFree(bseq);CHKERRQ(ierr);
4251   if (!rowb){
4252     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4253   } else {
4254     *rowb = isrowb;
4255   }
4256   if (!colb){
4257     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4258   } else {
4259     *colb = iscolb;
4260   }
4261   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4262   PetscFunctionReturn(0);
4263 }
4264 
4265 #undef __FUNCT__
4266 #define __FUNCT__ "MatGetBrowsOfAoCols"
4267 /*@C
4268     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4269     of the OFF-DIAGONAL portion of local A
4270 
4271     Collective on Mat
4272 
4273    Input Parameters:
4274 +    A,B - the matrices in mpiaij format
4275 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4276 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4277 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4278 
4279    Output Parameter:
4280 +    B_oth - the sequential matrix generated
4281 
4282     Level: developer
4283 
4284 @*/
4285 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4286 {
4287   VecScatter_MPI_General *gen_to,*gen_from;
4288   PetscErrorCode         ierr;
4289   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4290   Mat_SeqAIJ             *b_oth;
4291   VecScatter             ctx=a->Mvctx;
4292   MPI_Comm               comm=((PetscObject)ctx)->comm;
4293   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4294   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4295   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4296   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4297   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4298   MPI_Status             *sstatus,rstatus;
4299   PetscMPIInt            jj;
4300   PetscInt               *cols,sbs,rbs;
4301   PetscScalar            *vals;
4302 
4303   PetscFunctionBegin;
4304   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4305     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4306   }
4307   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4308   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4309 
4310   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4311   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4312   rvalues  = gen_from->values; /* holds the length of receiving row */
4313   svalues  = gen_to->values;   /* holds the length of sending row */
4314   nrecvs   = gen_from->n;
4315   nsends   = gen_to->n;
4316 
4317   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4318   srow     = gen_to->indices;   /* local row index to be sent */
4319   sstarts  = gen_to->starts;
4320   sprocs   = gen_to->procs;
4321   sstatus  = gen_to->sstatus;
4322   sbs      = gen_to->bs;
4323   rstarts  = gen_from->starts;
4324   rprocs   = gen_from->procs;
4325   rbs      = gen_from->bs;
4326 
4327   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4328   if (scall == MAT_INITIAL_MATRIX){
4329     /* i-array */
4330     /*---------*/
4331     /*  post receives */
4332     for (i=0; i<nrecvs; i++){
4333       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4334       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4335       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4336     }
4337 
4338     /* pack the outgoing message */
4339     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4340     rstartsj = sstartsj + nsends +1;
4341     sstartsj[0] = 0;  rstartsj[0] = 0;
4342     len = 0; /* total length of j or a array to be sent */
4343     k = 0;
4344     for (i=0; i<nsends; i++){
4345       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4346       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4347       for (j=0; j<nrows; j++) {
4348         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4349         for (l=0; l<sbs; l++){
4350           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4351           rowlen[j*sbs+l] = ncols;
4352           len += ncols;
4353           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4354         }
4355         k++;
4356       }
4357       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4358       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4359     }
4360     /* recvs and sends of i-array are completed */
4361     i = nrecvs;
4362     while (i--) {
4363       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4364     }
4365     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4366 
4367     /* allocate buffers for sending j and a arrays */
4368     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4369     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4370 
4371     /* create i-array of B_oth */
4372     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4373     b_othi[0] = 0;
4374     len = 0; /* total length of j or a array to be received */
4375     k = 0;
4376     for (i=0; i<nrecvs; i++){
4377       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4378       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4379       for (j=0; j<nrows; j++) {
4380         b_othi[k+1] = b_othi[k] + rowlen[j];
4381         len += rowlen[j]; k++;
4382       }
4383       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4384     }
4385 
4386     /* allocate space for j and a arrrays of B_oth */
4387     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4388     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4389 
4390     /* j-array */
4391     /*---------*/
4392     /*  post receives of j-array */
4393     for (i=0; i<nrecvs; i++){
4394       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4395       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4396     }
4397 
4398     /* pack the outgoing message j-array */
4399     k = 0;
4400     for (i=0; i<nsends; i++){
4401       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4402       bufJ = bufj+sstartsj[i];
4403       for (j=0; j<nrows; j++) {
4404         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4405         for (ll=0; ll<sbs; ll++){
4406           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4407           for (l=0; l<ncols; l++){
4408             *bufJ++ = cols[l];
4409           }
4410           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4411         }
4412       }
4413       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4414     }
4415 
4416     /* recvs and sends of j-array are completed */
4417     i = nrecvs;
4418     while (i--) {
4419       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4420     }
4421     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4422   } else if (scall == MAT_REUSE_MATRIX){
4423     sstartsj = *startsj;
4424     rstartsj = sstartsj + nsends +1;
4425     bufa     = *bufa_ptr;
4426     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4427     b_otha   = b_oth->a;
4428   } else {
4429     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4430   }
4431 
4432   /* a-array */
4433   /*---------*/
4434   /*  post receives of a-array */
4435   for (i=0; i<nrecvs; i++){
4436     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4437     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4438   }
4439 
4440   /* pack the outgoing message a-array */
4441   k = 0;
4442   for (i=0; i<nsends; i++){
4443     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4444     bufA = bufa+sstartsj[i];
4445     for (j=0; j<nrows; j++) {
4446       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4447       for (ll=0; ll<sbs; ll++){
4448         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4449         for (l=0; l<ncols; l++){
4450           *bufA++ = vals[l];
4451         }
4452         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4453       }
4454     }
4455     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4456   }
4457   /* recvs and sends of a-array are completed */
4458   i = nrecvs;
4459   while (i--) {
4460     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4461   }
4462   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4463   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4464 
4465   if (scall == MAT_INITIAL_MATRIX){
4466     /* put together the new matrix */
4467     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4468 
4469     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4470     /* Since these are PETSc arrays, change flags to free them as necessary. */
4471     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4472     b_oth->free_a  = PETSC_TRUE;
4473     b_oth->free_ij = PETSC_TRUE;
4474     b_oth->nonew   = 0;
4475 
4476     ierr = PetscFree(bufj);CHKERRQ(ierr);
4477     if (!startsj || !bufa_ptr){
4478       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4479       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4480     } else {
4481       *startsj  = sstartsj;
4482       *bufa_ptr = bufa;
4483     }
4484   }
4485   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4486   PetscFunctionReturn(0);
4487 }
4488 
4489 #undef __FUNCT__
4490 #define __FUNCT__ "MatGetCommunicationStructs"
4491 /*@C
4492   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4493 
4494   Not Collective
4495 
4496   Input Parameters:
4497 . A - The matrix in mpiaij format
4498 
4499   Output Parameter:
4500 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4501 . colmap - A map from global column index to local index into lvec
4502 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4503 
4504   Level: developer
4505 
4506 @*/
4507 #if defined (PETSC_USE_CTABLE)
4508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4509 #else
4510 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4511 #endif
4512 {
4513   Mat_MPIAIJ *a;
4514 
4515   PetscFunctionBegin;
4516   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4517   PetscValidPointer(lvec, 2)
4518   PetscValidPointer(colmap, 3)
4519   PetscValidPointer(multScatter, 4)
4520   a = (Mat_MPIAIJ *) A->data;
4521   if (lvec) *lvec = a->lvec;
4522   if (colmap) *colmap = a->colmap;
4523   if (multScatter) *multScatter = a->Mvctx;
4524   PetscFunctionReturn(0);
4525 }
4526 
4527 EXTERN_C_BEGIN
4528 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4529 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4530 EXTERN_C_END
4531 
4532 /*MC
4533    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4534 
4535    Options Database Keys:
4536 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4537 
4538   Level: beginner
4539 
4540 .seealso: MatCreateMPIAIJ()
4541 M*/
4542 
4543 EXTERN_C_BEGIN
4544 #undef __FUNCT__
4545 #define __FUNCT__ "MatCreate_MPIAIJ"
4546 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4547 {
4548   Mat_MPIAIJ     *b;
4549   PetscErrorCode ierr;
4550   PetscMPIInt    size;
4551 
4552   PetscFunctionBegin;
4553   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4554 
4555   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4556   B->data         = (void*)b;
4557   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4558   B->factor       = 0;
4559   B->rmap.bs      = 1;
4560   B->assembled    = PETSC_FALSE;
4561   B->mapping      = 0;
4562 
4563   B->insertmode      = NOT_SET_VALUES;
4564   b->size            = size;
4565   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4566 
4567   /* build cache for off array entries formed */
4568   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4569   b->donotstash  = PETSC_FALSE;
4570   b->colmap      = 0;
4571   b->garray      = 0;
4572   b->roworiented = PETSC_TRUE;
4573 
4574   /* stuff used for matrix vector multiply */
4575   b->lvec      = PETSC_NULL;
4576   b->Mvctx     = PETSC_NULL;
4577 
4578   /* stuff for MatGetRow() */
4579   b->rowindices   = 0;
4580   b->rowvalues    = 0;
4581   b->getrowactive = PETSC_FALSE;
4582 
4583 
4584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4585                                      "MatStoreValues_MPIAIJ",
4586                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4588                                      "MatRetrieveValues_MPIAIJ",
4589                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4591 				     "MatGetDiagonalBlock_MPIAIJ",
4592                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4594 				     "MatIsTranspose_MPIAIJ",
4595 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4596   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4597 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4598 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4599   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4600 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4601 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4603 				     "MatDiagonalScaleLocal_MPIAIJ",
4604 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4606                                      "MatConvert_MPIAIJ_MPICSRPERM",
4607                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4609                                      "MatConvert_MPIAIJ_MPICRL",
4610                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4611   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4612   PetscFunctionReturn(0);
4613 }
4614 EXTERN_C_END
4615 
4616 #undef __FUNCT__
4617 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4618 /*@
4619      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4620          and "off-diagonal" part of the matrix in CSR format.
4621 
4622    Collective on MPI_Comm
4623 
4624    Input Parameters:
4625 +  comm - MPI communicator
4626 .  m - number of local rows (Cannot be PETSC_DECIDE)
4627 .  n - This value should be the same as the local size used in creating the
4628        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4629        calculated if N is given) For square matrices n is almost always m.
4630 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4631 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4632 .   i - row indices for "diagonal" portion of matrix
4633 .   j - column indices
4634 .   a - matrix values
4635 .   oi - row indices for "off-diagonal" portion of matrix
4636 .   oj - column indices
4637 -   oa - matrix values
4638 
4639    Output Parameter:
4640 .   mat - the matrix
4641 
4642    Level: advanced
4643 
4644    Notes:
4645        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4646 
4647        The i and j indices are 0 based
4648 
4649        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4650 
4651 
4652 .keywords: matrix, aij, compressed row, sparse, parallel
4653 
4654 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4655           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4656 @*/
4657 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4658 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4659 {
4660   PetscErrorCode ierr;
4661   Mat_MPIAIJ     *maij;
4662 
4663  PetscFunctionBegin;
4664   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4665   if (i[0]) {
4666     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4667   }
4668   if (oi[0]) {
4669     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4670   }
4671   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4672   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4673   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4674   maij = (Mat_MPIAIJ*) (*mat)->data;
4675   maij->donotstash     = PETSC_TRUE;
4676   (*mat)->preallocated = PETSC_TRUE;
4677 
4678   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4679   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4680   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4681 
4682   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4683   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4684 
4685   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4686   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4687   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4688   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4689 
4690   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4691   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4692   PetscFunctionReturn(0);
4693 }
4694 
4695 /*
4696     Special version for direct calls from Fortran
4697 */
4698 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4699 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4700 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4701 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4702 #endif
4703 
4704 /* Change these macros so can be used in void function */
4705 #undef CHKERRQ
4706 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4707 #undef SETERRQ2
4708 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4709 #undef SETERRQ
4710 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4711 
4712 EXTERN_C_BEGIN
4713 #undef __FUNCT__
4714 #define __FUNCT__ "matsetvaluesmpiaij_"
4715 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4716 {
4717   Mat            mat = *mmat;
4718   PetscInt       m = *mm, n = *mn;
4719   InsertMode     addv = *maddv;
4720   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4721   PetscScalar    value;
4722   PetscErrorCode ierr;
4723 
4724   MatPreallocated(mat);
4725   if (mat->insertmode == NOT_SET_VALUES) {
4726     mat->insertmode = addv;
4727   }
4728 #if defined(PETSC_USE_DEBUG)
4729   else if (mat->insertmode != addv) {
4730     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4731   }
4732 #endif
4733   {
4734   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4735   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4736   PetscTruth     roworiented = aij->roworiented;
4737 
4738   /* Some Variables required in the macro */
4739   Mat            A = aij->A;
4740   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4741   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4742   PetscScalar    *aa = a->a;
4743   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4744   Mat            B = aij->B;
4745   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4746   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4747   PetscScalar    *ba = b->a;
4748 
4749   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4750   PetscInt       nonew = a->nonew;
4751   PetscScalar    *ap1,*ap2;
4752 
4753   PetscFunctionBegin;
4754   for (i=0; i<m; i++) {
4755     if (im[i] < 0) continue;
4756 #if defined(PETSC_USE_DEBUG)
4757     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4758 #endif
4759     if (im[i] >= rstart && im[i] < rend) {
4760       row      = im[i] - rstart;
4761       lastcol1 = -1;
4762       rp1      = aj + ai[row];
4763       ap1      = aa + ai[row];
4764       rmax1    = aimax[row];
4765       nrow1    = ailen[row];
4766       low1     = 0;
4767       high1    = nrow1;
4768       lastcol2 = -1;
4769       rp2      = bj + bi[row];
4770       ap2      = ba + bi[row];
4771       rmax2    = bimax[row];
4772       nrow2    = bilen[row];
4773       low2     = 0;
4774       high2    = nrow2;
4775 
4776       for (j=0; j<n; j++) {
4777         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4778         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4779         if (in[j] >= cstart && in[j] < cend){
4780           col = in[j] - cstart;
4781           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4782         } else if (in[j] < 0) continue;
4783 #if defined(PETSC_USE_DEBUG)
4784         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4785 #endif
4786         else {
4787           if (mat->was_assembled) {
4788             if (!aij->colmap) {
4789               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4790             }
4791 #if defined (PETSC_USE_CTABLE)
4792             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4793 	    col--;
4794 #else
4795             col = aij->colmap[in[j]] - 1;
4796 #endif
4797             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4798               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4799               col =  in[j];
4800               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4801               B = aij->B;
4802               b = (Mat_SeqAIJ*)B->data;
4803               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4804               rp2      = bj + bi[row];
4805               ap2      = ba + bi[row];
4806               rmax2    = bimax[row];
4807               nrow2    = bilen[row];
4808               low2     = 0;
4809               high2    = nrow2;
4810               bm       = aij->B->rmap.n;
4811               ba = b->a;
4812             }
4813           } else col = in[j];
4814           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4815         }
4816       }
4817     } else {
4818       if (!aij->donotstash) {
4819         if (roworiented) {
4820           if (ignorezeroentries && v[i*n] == 0.0) continue;
4821           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4822         } else {
4823           if (ignorezeroentries && v[i] == 0.0) continue;
4824           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4825         }
4826       }
4827     }
4828   }}
4829   PetscFunctionReturnVoid();
4830 }
4831 EXTERN_C_END
4832 
4833