xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 71f1c65dd82776e09714a368b7ddcb2e1f403c48)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1024 
1025   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1026     if (flag & SOR_ZERO_INITIAL_GUESS) {
1027       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1028       its--;
1029     }
1030 
1031     while (its--) {
1032       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034 
1035       /* update rhs: bb1 = bb - B*x */
1036       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1037       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1038 
1039       /* local sweep */
1040       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1041     }
1042   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1043     if (flag & SOR_ZERO_INITIAL_GUESS) {
1044       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1045       its--;
1046     }
1047     while (its--) {
1048       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050 
1051       /* update rhs: bb1 = bb - B*x */
1052       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1053       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1054 
1055       /* local sweep */
1056       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1057     }
1058   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1059     if (flag & SOR_ZERO_INITIAL_GUESS) {
1060       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1061       its--;
1062     }
1063     while (its--) {
1064       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1065       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066 
1067       /* update rhs: bb1 = bb - B*x */
1068       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1069       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1070 
1071       /* local sweep */
1072       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1073     }
1074   } else {
1075     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1076   }
1077 
1078   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatPermute_MPIAIJ"
1084 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1085 {
1086   MPI_Comm       comm,pcomm;
1087   PetscInt       first,local_size,nrows,*rows;
1088   int            ntids;
1089   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1094   /* make a collective version of 'rowp' */
1095   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1096   if (pcomm==comm) {
1097     crowp = rowp;
1098   } else {
1099     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1100     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1101     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1102     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1103   }
1104   /* collect the global row permutation and invert it */
1105   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1106   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1107   if (pcomm!=comm) {
1108     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1109   }
1110   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1111   /* get the local target indices */
1112   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1113   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1114   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1115   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1116   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1117   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1118   /* the column permutation is so much easier;
1119      make a local version of 'colp' and invert it */
1120   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1121   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1122   if (ntids==1) {
1123     lcolp = colp;
1124   } else {
1125     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1126     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1127     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1128   }
1129   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1130   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1131   if (ntids>1) {
1132     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1133     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1134   }
1135   /* now we just get the submatrix */
1136   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1137   /* clean up */
1138   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1139   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1145 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1146 {
1147   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1148   Mat            A = mat->A,B = mat->B;
1149   PetscErrorCode ierr;
1150   PetscReal      isend[5],irecv[5];
1151 
1152   PetscFunctionBegin;
1153   info->block_size     = 1.0;
1154   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156   isend[3] = info->memory;  isend[4] = info->mallocs;
1157   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1168     info->nz_used      = irecv[0];
1169     info->nz_allocated = irecv[1];
1170     info->nz_unneeded  = irecv[2];
1171     info->memory       = irecv[3];
1172     info->mallocs      = irecv[4];
1173   } else if (flag == MAT_GLOBAL_SUM) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   }
1181   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1182   info->fill_ratio_needed = 0;
1183   info->factor_mallocs    = 0;
1184   info->rows_global       = (double)matin->rmap.N;
1185   info->columns_global    = (double)matin->cmap.N;
1186   info->rows_local        = (double)matin->rmap.n;
1187   info->columns_local     = (double)matin->cmap.N;
1188 
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatSetOption_MPIAIJ"
1194 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1195 {
1196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   switch (op) {
1201   case MAT_NEW_NONZERO_LOCATIONS:
1202   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1203   case MAT_KEEP_ZEROED_ROWS:
1204   case MAT_NEW_NONZERO_LOCATION_ERR:
1205   case MAT_USE_INODES:
1206   case MAT_IGNORE_ZERO_ENTRIES:
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1209     break;
1210   case MAT_ROW_ORIENTED:
1211     a->roworiented = flg;
1212     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1214     break;
1215   case MAT_NEW_DIAGONALS:
1216     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1217     break;
1218   case MAT_IGNORE_OFF_PROC_ENTRIES:
1219     a->donotstash = PETSC_TRUE;
1220     break;
1221   case MAT_SYMMETRIC:
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     break;
1224   case MAT_STRUCTURALLY_SYMMETRIC:
1225   case MAT_HERMITIAN:
1226   case MAT_SYMMETRY_ETERNAL:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   default:
1230     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatGetRow_MPIAIJ"
1237 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1238 {
1239   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1240   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1241   PetscErrorCode ierr;
1242   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1243   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1244   PetscInt       *cmap,*idx_p;
1245 
1246   PetscFunctionBegin;
1247   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1248   mat->getrowactive = PETSC_TRUE;
1249 
1250   if (!mat->rowvalues && (idx || v)) {
1251     /*
1252         allocate enough space to hold information from the longest row.
1253     */
1254     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1255     PetscInt     max = 1,tmp;
1256     for (i=0; i<matin->rmap.n; i++) {
1257       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1258       if (max < tmp) { max = tmp; }
1259     }
1260     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1261     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1262   }
1263 
1264   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1265   lrow = row - rstart;
1266 
1267   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1268   if (!v)   {pvA = 0; pvB = 0;}
1269   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1270   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1271   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1272   nztot = nzA + nzB;
1273 
1274   cmap  = mat->garray;
1275   if (v  || idx) {
1276     if (nztot) {
1277       /* Sort by increasing column numbers, assuming A and B already sorted */
1278       PetscInt imark = -1;
1279       if (v) {
1280         *v = v_p = mat->rowvalues;
1281         for (i=0; i<nzB; i++) {
1282           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1283           else break;
1284         }
1285         imark = i;
1286         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1287         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1288       }
1289       if (idx) {
1290         *idx = idx_p = mat->rowindices;
1291         if (imark > -1) {
1292           for (i=0; i<imark; i++) {
1293             idx_p[i] = cmap[cworkB[i]];
1294           }
1295         } else {
1296           for (i=0; i<nzB; i++) {
1297             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1298             else break;
1299           }
1300           imark = i;
1301         }
1302         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1303         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1304       }
1305     } else {
1306       if (idx) *idx = 0;
1307       if (v)   *v   = 0;
1308     }
1309   }
1310   *nz = nztot;
1311   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1312   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1318 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1319 {
1320   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1321 
1322   PetscFunctionBegin;
1323   if (!aij->getrowactive) {
1324     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1325   }
1326   aij->getrowactive = PETSC_FALSE;
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatNorm_MPIAIJ"
1332 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1333 {
1334   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1335   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1336   PetscErrorCode ierr;
1337   PetscInt       i,j,cstart = mat->cmap.rstart;
1338   PetscReal      sum = 0.0;
1339   PetscScalar    *v;
1340 
1341   PetscFunctionBegin;
1342   if (aij->size == 1) {
1343     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1344   } else {
1345     if (type == NORM_FROBENIUS) {
1346       v = amat->a;
1347       for (i=0; i<amat->nz; i++) {
1348 #if defined(PETSC_USE_COMPLEX)
1349         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1350 #else
1351         sum += (*v)*(*v); v++;
1352 #endif
1353       }
1354       v = bmat->a;
1355       for (i=0; i<bmat->nz; i++) {
1356 #if defined(PETSC_USE_COMPLEX)
1357         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1358 #else
1359         sum += (*v)*(*v); v++;
1360 #endif
1361       }
1362       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1363       *norm = sqrt(*norm);
1364     } else if (type == NORM_1) { /* max column norm */
1365       PetscReal *tmp,*tmp2;
1366       PetscInt    *jj,*garray = aij->garray;
1367       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1368       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1369       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1370       *norm = 0.0;
1371       v = amat->a; jj = amat->j;
1372       for (j=0; j<amat->nz; j++) {
1373         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1374       }
1375       v = bmat->a; jj = bmat->j;
1376       for (j=0; j<bmat->nz; j++) {
1377         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1378       }
1379       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1380       for (j=0; j<mat->cmap.N; j++) {
1381         if (tmp2[j] > *norm) *norm = tmp2[j];
1382       }
1383       ierr = PetscFree(tmp);CHKERRQ(ierr);
1384       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1385     } else if (type == NORM_INFINITY) { /* max row norm */
1386       PetscReal ntemp = 0.0;
1387       for (j=0; j<aij->A->rmap.n; j++) {
1388         v = amat->a + amat->i[j];
1389         sum = 0.0;
1390         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1391           sum += PetscAbsScalar(*v); v++;
1392         }
1393         v = bmat->a + bmat->i[j];
1394         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1395           sum += PetscAbsScalar(*v); v++;
1396         }
1397         if (sum > ntemp) ntemp = sum;
1398       }
1399       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1400     } else {
1401       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1402     }
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatTranspose_MPIAIJ"
1409 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1410 {
1411   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1412   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1413   PetscErrorCode ierr;
1414   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1415   PetscInt       cstart=A->cmap.rstart,ncol;
1416   Mat            B;
1417   PetscScalar    *array;
1418 
1419   PetscFunctionBegin;
1420   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1421 
1422   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1423   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1424   ai = Aloc->i; aj = Aloc->j;
1425   bi = Bloc->i; bj = Bloc->j;
1426   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1427   cols = d_nnz + na + 1; /* work space to be used by B part */
1428   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1429   for (i=0; i<ai[ma]; i++){
1430     d_nnz[aj[i]] ++;
1431     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1432   }
1433 
1434   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1435   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1436   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1437   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1438 
1439   /* copy over the A part */
1440   array = Aloc->a;
1441   row = A->rmap.rstart;
1442   for (i=0; i<ma; i++) {
1443     ncol = ai[i+1]-ai[i];
1444     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1445     row++; array += ncol; aj += ncol;
1446   }
1447   aj = Aloc->j;
1448   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1449 
1450   /* copy over the B part */
1451   array = Bloc->a;
1452   row = A->rmap.rstart;
1453   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1454   for (i=0; i<mb; i++) {
1455     ncol = bi[i+1]-bi[i];
1456     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1457     row++; array += ncol; cols += ncol;
1458   }
1459   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1460   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462   if (matout) {
1463     *matout = B;
1464   } else {
1465     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1466   }
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1472 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1473 {
1474   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1475   Mat            a = aij->A,b = aij->B;
1476   PetscErrorCode ierr;
1477   PetscInt       s1,s2,s3;
1478 
1479   PetscFunctionBegin;
1480   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1481   if (rr) {
1482     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1483     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1484     /* Overlap communication with computation. */
1485     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486   }
1487   if (ll) {
1488     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1489     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1490     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1491   }
1492   /* scale  the diagonal block */
1493   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1494 
1495   if (rr) {
1496     /* Do a scatter end and then right scale the off-diagonal block */
1497     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1498     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1499   }
1500 
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1506 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1507 {
1508   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1513   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1518 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1519 {
1520   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatEqual_MPIAIJ"
1530 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1531 {
1532   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1533   Mat            a,b,c,d;
1534   PetscTruth     flg;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   a = matA->A; b = matA->B;
1539   c = matB->A; d = matB->B;
1540 
1541   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1542   if (flg) {
1543     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1544   }
1545   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatCopy_MPIAIJ"
1551 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1552 {
1553   PetscErrorCode ierr;
1554   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1555   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1556 
1557   PetscFunctionBegin;
1558   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1559   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1560     /* because of the column compression in the off-processor part of the matrix a->B,
1561        the number of columns in a->B and b->B may be different, hence we cannot call
1562        the MatCopy() directly on the two parts. If need be, we can provide a more
1563        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1564        then copying the submatrices */
1565     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1566   } else {
1567     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1568     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1569   }
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1575 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1576 {
1577   PetscErrorCode ierr;
1578 
1579   PetscFunctionBegin;
1580   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #include "petscblaslapack.h"
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatAXPY_MPIAIJ"
1587 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1588 {
1589   PetscErrorCode ierr;
1590   PetscInt       i;
1591   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1592   PetscBLASInt   bnz,one=1;
1593   Mat_SeqAIJ     *x,*y;
1594 
1595   PetscFunctionBegin;
1596   if (str == SAME_NONZERO_PATTERN) {
1597     PetscScalar alpha = a;
1598     x = (Mat_SeqAIJ *)xx->A->data;
1599     y = (Mat_SeqAIJ *)yy->A->data;
1600     bnz = (PetscBLASInt)x->nz;
1601     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1602     x = (Mat_SeqAIJ *)xx->B->data;
1603     y = (Mat_SeqAIJ *)yy->B->data;
1604     bnz = (PetscBLASInt)x->nz;
1605     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1606   } else if (str == SUBSET_NONZERO_PATTERN) {
1607     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1608 
1609     x = (Mat_SeqAIJ *)xx->B->data;
1610     y = (Mat_SeqAIJ *)yy->B->data;
1611     if (y->xtoy && y->XtoY != xx->B) {
1612       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1613       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1614     }
1615     if (!y->xtoy) { /* get xtoy */
1616       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1617       y->XtoY = xx->B;
1618       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1619     }
1620     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1621   } else {
1622     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatConjugate_MPIAIJ"
1631 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1632 {
1633 #if defined(PETSC_USE_COMPLEX)
1634   PetscErrorCode ierr;
1635   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1636 
1637   PetscFunctionBegin;
1638   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1639   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1640 #else
1641   PetscFunctionBegin;
1642 #endif
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatRealPart_MPIAIJ"
1648 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1649 {
1650   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1651   PetscErrorCode ierr;
1652 
1653   PetscFunctionBegin;
1654   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1655   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1661 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1662 {
1663   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1664   PetscErrorCode ierr;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1668   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #ifdef PETSC_HAVE_PBGL
1673 
1674 #include <boost/parallel/mpi/bsp_process_group.hpp>
1675 #include <boost/graph/distributed/ilu_default_graph.hpp>
1676 #include <boost/graph/distributed/ilu_0_block.hpp>
1677 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1678 #include <boost/graph/distributed/petsc/interface.hpp>
1679 #include <boost/multi_array.hpp>
1680 #include <boost/parallel/distributed_property_map.hpp>
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1684 /*
1685   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1686 */
1687 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1688 {
1689   namespace petsc = boost::distributed::petsc;
1690 
1691   namespace graph_dist = boost::graph::distributed;
1692   using boost::graph::distributed::ilu_default::process_group_type;
1693   using boost::graph::ilu_permuted;
1694 
1695   PetscTruth      row_identity, col_identity;
1696   PetscContainer  c;
1697   PetscInt        m, n, M, N;
1698   PetscErrorCode  ierr;
1699 
1700   PetscFunctionBegin;
1701   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1702   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1703   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1704   if (!row_identity || !col_identity) {
1705     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1706   }
1707 
1708   process_group_type pg;
1709   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1710   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1711   lgraph_type&   level_graph = *lgraph_p;
1712   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1713 
1714   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1715   ilu_permuted(level_graph);
1716 
1717   /* put together the new matrix */
1718   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1719   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1720   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1721   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1722   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1723   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725   (*fact)->factor = FACTOR_LU;
1726 
1727   ierr = PetscContainerCreate(A->comm, &c);
1728   ierr = PetscContainerSetPointer(c, lgraph_p);
1729   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1735 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1736 {
1737   PetscFunctionBegin;
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatSolve_MPIAIJ"
1743 /*
1744   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1745 */
1746 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1747 {
1748   namespace graph_dist = boost::graph::distributed;
1749 
1750   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1751   lgraph_type*   lgraph_p;
1752   PetscContainer c;
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1757   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1758   ierr = VecCopy(b, x); CHKERRQ(ierr);
1759 
1760   PetscScalar* array_x;
1761   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1762   PetscInt sx;
1763   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1764 
1765   PetscScalar* array_b;
1766   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1767   PetscInt sb;
1768   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1769 
1770   lgraph_type&   level_graph = *lgraph_p;
1771   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1772 
1773   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1774   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1775                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1776 
1777   typedef boost::iterator_property_map<array_ref_type::iterator,
1778                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1779   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1780                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1781 
1782   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1783 
1784   PetscFunctionReturn(0);
1785 }
1786 #endif
1787 
1788 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1789   PetscInt       nzlocal,nsends,nrecvs;
1790   PetscMPIInt    *send_rank;
1791   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1792   PetscScalar    *sbuf_a,**rbuf_a;
1793   PetscErrorCode (*MatDestroy)(Mat);
1794 } Mat_Redundant;
1795 
1796 #undef __FUNCT__
1797 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1798 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1799 {
1800   PetscErrorCode       ierr;
1801   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1802   PetscInt             i;
1803 
1804   PetscFunctionBegin;
1805   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1806   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1807   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1808   for (i=0; i<redund->nrecvs; i++){
1809     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1810     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1811   }
1812   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1813   ierr = PetscFree(redund);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatDestroy_MatRedundant"
1819 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1820 {
1821   PetscErrorCode  ierr;
1822   PetscContainer  container;
1823   Mat_Redundant   *redund=PETSC_NULL;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1827   if (container) {
1828     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1829   } else {
1830     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1831   }
1832   A->ops->destroy = redund->MatDestroy;
1833   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1834   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1835   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1841 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1842 {
1843   PetscMPIInt    rank,size;
1844   MPI_Comm       comm=mat->comm;
1845   PetscErrorCode ierr;
1846   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1847   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1848   PetscInt       *rowrange=mat->rmap.range;
1849   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1850   Mat            A=aij->A,B=aij->B,C=*matredundant;
1851   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1852   PetscScalar    *sbuf_a;
1853   PetscInt       nzlocal=a->nz+b->nz;
1854   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1855   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1856   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1857   PetscScalar    *vals,*aworkA,*aworkB;
1858   PetscMPIInt    tag1,tag2,tag3,imdex;
1859   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1860                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1861   MPI_Status     recv_status,*send_status;
1862   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1863   PetscInt       **rbuf_j=PETSC_NULL;
1864   PetscScalar    **rbuf_a=PETSC_NULL;
1865   Mat_Redundant  *redund=PETSC_NULL;
1866   PetscContainer container;
1867 
1868   PetscFunctionBegin;
1869   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1871 
1872   if (reuse == MAT_REUSE_MATRIX) {
1873     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1874     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1875     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1876     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1877     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1878     if (container) {
1879       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1880     } else {
1881       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1882     }
1883     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1884 
1885     nsends    = redund->nsends;
1886     nrecvs    = redund->nrecvs;
1887     send_rank = redund->send_rank; recv_rank = send_rank + size;
1888     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1889     sbuf_j    = redund->sbuf_j;
1890     sbuf_a    = redund->sbuf_a;
1891     rbuf_j    = redund->rbuf_j;
1892     rbuf_a    = redund->rbuf_a;
1893   }
1894 
1895   if (reuse == MAT_INITIAL_MATRIX){
1896     PetscMPIInt  subrank,subsize;
1897     PetscInt     nleftover,np_subcomm;
1898     /* get the destination processors' id send_rank, nsends and nrecvs */
1899     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1900     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1901     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1902     recv_rank = send_rank + size;
1903     np_subcomm = size/nsubcomm;
1904     nleftover  = size - nsubcomm*np_subcomm;
1905     nsends = 0; nrecvs = 0;
1906     for (i=0; i<size; i++){ /* i=rank*/
1907       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1908         send_rank[nsends] = i; nsends++;
1909         recv_rank[nrecvs++] = i;
1910       }
1911     }
1912     if (rank >= size - nleftover){/* this proc is a leftover processor */
1913       i = size-nleftover-1;
1914       j = 0;
1915       while (j < nsubcomm - nleftover){
1916         send_rank[nsends++] = i;
1917         i--; j++;
1918       }
1919     }
1920 
1921     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1922       for (i=0; i<nleftover; i++){
1923         recv_rank[nrecvs++] = size-nleftover+i;
1924       }
1925     }
1926 
1927     /* allocate sbuf_j, sbuf_a */
1928     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1929     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1930     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1931   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1932 
1933   /* copy mat's local entries into the buffers */
1934   if (reuse == MAT_INITIAL_MATRIX){
1935     rownz_max = 0;
1936     rptr = sbuf_j;
1937     cols = sbuf_j + rend-rstart + 1;
1938     vals = sbuf_a;
1939     rptr[0] = 0;
1940     for (i=0; i<rend-rstart; i++){
1941       row = i + rstart;
1942       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1943       ncols  = nzA + nzB;
1944       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1945       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1946       /* load the column indices for this row into cols */
1947       lwrite = 0;
1948       for (l=0; l<nzB; l++) {
1949         if ((ctmp = bmap[cworkB[l]]) < cstart){
1950           vals[lwrite]   = aworkB[l];
1951           cols[lwrite++] = ctmp;
1952         }
1953       }
1954       for (l=0; l<nzA; l++){
1955         vals[lwrite]   = aworkA[l];
1956         cols[lwrite++] = cstart + cworkA[l];
1957       }
1958       for (l=0; l<nzB; l++) {
1959         if ((ctmp = bmap[cworkB[l]]) >= cend){
1960           vals[lwrite]   = aworkB[l];
1961           cols[lwrite++] = ctmp;
1962         }
1963       }
1964       vals += ncols;
1965       cols += ncols;
1966       rptr[i+1] = rptr[i] + ncols;
1967       if (rownz_max < ncols) rownz_max = ncols;
1968     }
1969     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1970   } else { /* only copy matrix values into sbuf_a */
1971     rptr = sbuf_j;
1972     vals = sbuf_a;
1973     rptr[0] = 0;
1974     for (i=0; i<rend-rstart; i++){
1975       row = i + rstart;
1976       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1977       ncols  = nzA + nzB;
1978       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1979       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1980       lwrite = 0;
1981       for (l=0; l<nzB; l++) {
1982         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1983       }
1984       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1985       for (l=0; l<nzB; l++) {
1986         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1987       }
1988       vals += ncols;
1989       rptr[i+1] = rptr[i] + ncols;
1990     }
1991   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1992 
1993   /* send nzlocal to others, and recv other's nzlocal */
1994   /*--------------------------------------------------*/
1995   if (reuse == MAT_INITIAL_MATRIX){
1996     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
1997     s_waits2 = s_waits3 + nsends;
1998     s_waits1 = s_waits2 + nsends;
1999     r_waits1 = s_waits1 + nsends;
2000     r_waits2 = r_waits1 + nrecvs;
2001     r_waits3 = r_waits2 + nrecvs;
2002   } else {
2003     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2004     r_waits3 = s_waits3 + nsends;
2005   }
2006 
2007   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2008   if (reuse == MAT_INITIAL_MATRIX){
2009     /* get new tags to keep the communication clean */
2010     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2011     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2012     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2013     rbuf_nz = sbuf_nz + nsends;
2014 
2015     /* post receives of other's nzlocal */
2016     for (i=0; i<nrecvs; i++){
2017       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2018     }
2019     /* send nzlocal to others */
2020     for (i=0; i<nsends; i++){
2021       sbuf_nz[i] = nzlocal;
2022       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2023     }
2024     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2025     count = nrecvs;
2026     while (count) {
2027       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2028       recv_rank[imdex] = recv_status.MPI_SOURCE;
2029       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2030       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2031 
2032       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2033       rbuf_nz[imdex] += i + 2;
2034       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2035       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2036       count--;
2037     }
2038     /* wait on sends of nzlocal */
2039     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2040     /* send mat->i,j to others, and recv from other's */
2041     /*------------------------------------------------*/
2042     for (i=0; i<nsends; i++){
2043       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2044       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2045     }
2046     /* wait on receives of mat->i,j */
2047     /*------------------------------*/
2048     count = nrecvs;
2049     while (count) {
2050       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2051       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2052       count--;
2053     }
2054     /* wait on sends of mat->i,j */
2055     /*---------------------------*/
2056     if (nsends) {
2057       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2058     }
2059   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2060 
2061   /* post receives, send and receive mat->a */
2062   /*----------------------------------------*/
2063   for (imdex=0; imdex<nrecvs; imdex++) {
2064     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2065   }
2066   for (i=0; i<nsends; i++){
2067     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2068   }
2069   count = nrecvs;
2070   while (count) {
2071     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2072     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2073     count--;
2074   }
2075   if (nsends) {
2076     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2077   }
2078 
2079   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2080 
2081   /* create redundant matrix */
2082   /*-------------------------*/
2083   if (reuse == MAT_INITIAL_MATRIX){
2084     /* compute rownz_max for preallocation */
2085     for (imdex=0; imdex<nrecvs; imdex++){
2086       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2087       rptr = rbuf_j[imdex];
2088       for (i=0; i<j; i++){
2089         ncols = rptr[i+1] - rptr[i];
2090         if (rownz_max < ncols) rownz_max = ncols;
2091       }
2092     }
2093 
2094     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2095     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2096     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2097     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2098     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2099   } else {
2100     C = *matredundant;
2101   }
2102 
2103   /* insert local matrix entries */
2104   rptr = sbuf_j;
2105   cols = sbuf_j + rend-rstart + 1;
2106   vals = sbuf_a;
2107   for (i=0; i<rend-rstart; i++){
2108     row   = i + rstart;
2109     ncols = rptr[i+1] - rptr[i];
2110     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2111     vals += ncols;
2112     cols += ncols;
2113   }
2114   /* insert received matrix entries */
2115   for (imdex=0; imdex<nrecvs; imdex++){
2116     rstart = rowrange[recv_rank[imdex]];
2117     rend   = rowrange[recv_rank[imdex]+1];
2118     rptr = rbuf_j[imdex];
2119     cols = rbuf_j[imdex] + rend-rstart + 1;
2120     vals = rbuf_a[imdex];
2121     for (i=0; i<rend-rstart; i++){
2122       row   = i + rstart;
2123       ncols = rptr[i+1] - rptr[i];
2124       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2125       vals += ncols;
2126       cols += ncols;
2127     }
2128   }
2129   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2130   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2131   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2132   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2133   if (reuse == MAT_INITIAL_MATRIX){
2134     PetscContainer container;
2135     *matredundant = C;
2136     /* create a supporting struct and attach it to C for reuse */
2137     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2138     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2139     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2140     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2141     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2142 
2143     redund->nzlocal = nzlocal;
2144     redund->nsends  = nsends;
2145     redund->nrecvs  = nrecvs;
2146     redund->send_rank = send_rank;
2147     redund->sbuf_nz = sbuf_nz;
2148     redund->sbuf_j  = sbuf_j;
2149     redund->sbuf_a  = sbuf_a;
2150     redund->rbuf_j  = rbuf_j;
2151     redund->rbuf_a  = rbuf_a;
2152 
2153     redund->MatDestroy = C->ops->destroy;
2154     C->ops->destroy    = MatDestroy_MatRedundant;
2155   }
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 /* -------------------------------------------------------------------*/
2160 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2161        MatGetRow_MPIAIJ,
2162        MatRestoreRow_MPIAIJ,
2163        MatMult_MPIAIJ,
2164 /* 4*/ MatMultAdd_MPIAIJ,
2165        MatMultTranspose_MPIAIJ,
2166        MatMultTransposeAdd_MPIAIJ,
2167 #ifdef PETSC_HAVE_PBGL
2168        MatSolve_MPIAIJ,
2169 #else
2170        0,
2171 #endif
2172        0,
2173        0,
2174 /*10*/ 0,
2175        0,
2176        0,
2177        MatRelax_MPIAIJ,
2178        MatTranspose_MPIAIJ,
2179 /*15*/ MatGetInfo_MPIAIJ,
2180        MatEqual_MPIAIJ,
2181        MatGetDiagonal_MPIAIJ,
2182        MatDiagonalScale_MPIAIJ,
2183        MatNorm_MPIAIJ,
2184 /*20*/ MatAssemblyBegin_MPIAIJ,
2185        MatAssemblyEnd_MPIAIJ,
2186        0,
2187        MatSetOption_MPIAIJ,
2188        MatZeroEntries_MPIAIJ,
2189 /*25*/ MatZeroRows_MPIAIJ,
2190        0,
2191 #ifdef PETSC_HAVE_PBGL
2192        MatLUFactorNumeric_MPIAIJ,
2193 #else
2194        0,
2195 #endif
2196        0,
2197        0,
2198 /*30*/ MatSetUpPreallocation_MPIAIJ,
2199 #ifdef PETSC_HAVE_PBGL
2200        MatILUFactorSymbolic_MPIAIJ,
2201 #else
2202        0,
2203 #endif
2204        0,
2205        0,
2206        0,
2207 /*35*/ MatDuplicate_MPIAIJ,
2208        0,
2209        0,
2210        0,
2211        0,
2212 /*40*/ MatAXPY_MPIAIJ,
2213        MatGetSubMatrices_MPIAIJ,
2214        MatIncreaseOverlap_MPIAIJ,
2215        MatGetValues_MPIAIJ,
2216        MatCopy_MPIAIJ,
2217 /*45*/ 0,
2218        MatScale_MPIAIJ,
2219        0,
2220        0,
2221        0,
2222 /*50*/ MatSetBlockSize_MPIAIJ,
2223        0,
2224        0,
2225        0,
2226        0,
2227 /*55*/ MatFDColoringCreate_MPIAIJ,
2228        0,
2229        MatSetUnfactored_MPIAIJ,
2230        MatPermute_MPIAIJ,
2231        0,
2232 /*60*/ MatGetSubMatrix_MPIAIJ,
2233        MatDestroy_MPIAIJ,
2234        MatView_MPIAIJ,
2235        0,
2236        0,
2237 /*65*/ 0,
2238        0,
2239        0,
2240        0,
2241        0,
2242 /*70*/ 0,
2243        0,
2244        MatSetColoring_MPIAIJ,
2245 #if defined(PETSC_HAVE_ADIC)
2246        MatSetValuesAdic_MPIAIJ,
2247 #else
2248        0,
2249 #endif
2250        MatSetValuesAdifor_MPIAIJ,
2251 /*75*/ 0,
2252        0,
2253        0,
2254        0,
2255        0,
2256 /*80*/ 0,
2257        0,
2258        0,
2259        0,
2260 /*84*/ MatLoad_MPIAIJ,
2261        0,
2262        0,
2263        0,
2264        0,
2265        0,
2266 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2267        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2268        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2269        MatPtAP_Basic,
2270        MatPtAPSymbolic_MPIAIJ,
2271 /*95*/ MatPtAPNumeric_MPIAIJ,
2272        0,
2273        0,
2274        0,
2275        0,
2276 /*100*/0,
2277        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2278        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2279        MatConjugate_MPIAIJ,
2280        0,
2281 /*105*/MatSetValuesRow_MPIAIJ,
2282        MatRealPart_MPIAIJ,
2283        MatImaginaryPart_MPIAIJ,
2284        0,
2285        0,
2286 /*110*/0,
2287        MatGetRedundantMatrix_MPIAIJ};
2288 
2289 /* ----------------------------------------------------------------------------------------*/
2290 
2291 EXTERN_C_BEGIN
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2294 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2295 {
2296   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2301   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 EXTERN_C_END
2305 
2306 EXTERN_C_BEGIN
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2309 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2310 {
2311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2316   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 EXTERN_C_END
2320 
2321 #include "petscpc.h"
2322 EXTERN_C_BEGIN
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2325 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2326 {
2327   Mat_MPIAIJ     *b;
2328   PetscErrorCode ierr;
2329   PetscInt       i;
2330 
2331   PetscFunctionBegin;
2332   B->preallocated = PETSC_TRUE;
2333   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2334   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2335   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2336   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2337 
2338   B->rmap.bs = B->cmap.bs = 1;
2339   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2340   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2341   if (d_nnz) {
2342     for (i=0; i<B->rmap.n; i++) {
2343       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2344     }
2345   }
2346   if (o_nnz) {
2347     for (i=0; i<B->rmap.n; i++) {
2348       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2349     }
2350   }
2351   b = (Mat_MPIAIJ*)B->data;
2352 
2353   /* Explicitly create 2 MATSEQAIJ matrices. */
2354   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2355   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2356   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2357   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2358   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2359   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2360   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2361   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2362 
2363   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2364   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2365 
2366   PetscFunctionReturn(0);
2367 }
2368 EXTERN_C_END
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2372 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2373 {
2374   Mat            mat;
2375   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   *newmat       = 0;
2380   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2381   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2382   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2383   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2384   a    = (Mat_MPIAIJ*)mat->data;
2385 
2386   mat->factor       = matin->factor;
2387   mat->rmap.bs      = matin->rmap.bs;
2388   mat->assembled    = PETSC_TRUE;
2389   mat->insertmode   = NOT_SET_VALUES;
2390   mat->preallocated = PETSC_TRUE;
2391 
2392   a->size           = oldmat->size;
2393   a->rank           = oldmat->rank;
2394   a->donotstash     = oldmat->donotstash;
2395   a->roworiented    = oldmat->roworiented;
2396   a->rowindices     = 0;
2397   a->rowvalues      = 0;
2398   a->getrowactive   = PETSC_FALSE;
2399 
2400   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2401   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2402 
2403   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2404   if (oldmat->colmap) {
2405 #if defined (PETSC_USE_CTABLE)
2406     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2407 #else
2408     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2409     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2410     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2411 #endif
2412   } else a->colmap = 0;
2413   if (oldmat->garray) {
2414     PetscInt len;
2415     len  = oldmat->B->cmap.n;
2416     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2417     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2418     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2419   } else a->garray = 0;
2420 
2421   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2422   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2423   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2424   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2425   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2426   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2427   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2428   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2429   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2430   *newmat = mat;
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #include "petscsys.h"
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatLoad_MPIAIJ"
2438 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2439 {
2440   Mat            A;
2441   PetscScalar    *vals,*svals;
2442   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2443   MPI_Status     status;
2444   PetscErrorCode ierr;
2445   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2446   PetscInt       i,nz,j,rstart,rend,mmax;
2447   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2448   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2449   PetscInt       cend,cstart,n,*rowners;
2450   int            fd;
2451 
2452   PetscFunctionBegin;
2453   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2454   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2455   if (!rank) {
2456     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2457     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2458     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2459   }
2460 
2461   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2462   M = header[1]; N = header[2];
2463   /* determine ownership of all rows */
2464   m    = M/size + ((M % size) > rank);
2465   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2466   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2467 
2468   /* First process needs enough room for process with most rows */
2469   if (!rank) {
2470     mmax       = rowners[1];
2471     for (i=2; i<size; i++) {
2472       mmax = PetscMax(mmax,rowners[i]);
2473     }
2474   } else mmax = m;
2475 
2476   rowners[0] = 0;
2477   for (i=2; i<=size; i++) {
2478     rowners[i] += rowners[i-1];
2479   }
2480   rstart = rowners[rank];
2481   rend   = rowners[rank+1];
2482 
2483   /* distribute row lengths to all processors */
2484   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2485   if (!rank) {
2486     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2487     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2488     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2489     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2490     for (j=0; j<m; j++) {
2491       procsnz[0] += ourlens[j];
2492     }
2493     for (i=1; i<size; i++) {
2494       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2495       /* calculate the number of nonzeros on each processor */
2496       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2497         procsnz[i] += rowlengths[j];
2498       }
2499       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2500     }
2501     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2502   } else {
2503     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2504   }
2505 
2506   if (!rank) {
2507     /* determine max buffer needed and allocate it */
2508     maxnz = 0;
2509     for (i=0; i<size; i++) {
2510       maxnz = PetscMax(maxnz,procsnz[i]);
2511     }
2512     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2513 
2514     /* read in my part of the matrix column indices  */
2515     nz   = procsnz[0];
2516     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2517     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2518 
2519     /* read in every one elses and ship off */
2520     for (i=1; i<size; i++) {
2521       nz   = procsnz[i];
2522       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2523       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2524     }
2525     ierr = PetscFree(cols);CHKERRQ(ierr);
2526   } else {
2527     /* determine buffer space needed for message */
2528     nz = 0;
2529     for (i=0; i<m; i++) {
2530       nz += ourlens[i];
2531     }
2532     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2533 
2534     /* receive message of column indices*/
2535     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2536     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2537     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2538   }
2539 
2540   /* determine column ownership if matrix is not square */
2541   if (N != M) {
2542     n      = N/size + ((N % size) > rank);
2543     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2544     cstart = cend - n;
2545   } else {
2546     cstart = rstart;
2547     cend   = rend;
2548     n      = cend - cstart;
2549   }
2550 
2551   /* loop over local rows, determining number of off diagonal entries */
2552   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2553   jj = 0;
2554   for (i=0; i<m; i++) {
2555     for (j=0; j<ourlens[i]; j++) {
2556       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2557       jj++;
2558     }
2559   }
2560 
2561   /* create our matrix */
2562   for (i=0; i<m; i++) {
2563     ourlens[i] -= offlens[i];
2564   }
2565   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2566   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2567   ierr = MatSetType(A,type);CHKERRQ(ierr);
2568   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2569 
2570   for (i=0; i<m; i++) {
2571     ourlens[i] += offlens[i];
2572   }
2573 
2574   if (!rank) {
2575     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2576 
2577     /* read in my part of the matrix numerical values  */
2578     nz   = procsnz[0];
2579     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2580 
2581     /* insert into matrix */
2582     jj      = rstart;
2583     smycols = mycols;
2584     svals   = vals;
2585     for (i=0; i<m; i++) {
2586       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2587       smycols += ourlens[i];
2588       svals   += ourlens[i];
2589       jj++;
2590     }
2591 
2592     /* read in other processors and ship out */
2593     for (i=1; i<size; i++) {
2594       nz   = procsnz[i];
2595       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2596       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2597     }
2598     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2599   } else {
2600     /* receive numeric values */
2601     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2602 
2603     /* receive message of values*/
2604     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2605     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2606     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2607 
2608     /* insert into matrix */
2609     jj      = rstart;
2610     smycols = mycols;
2611     svals   = vals;
2612     for (i=0; i<m; i++) {
2613       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2614       smycols += ourlens[i];
2615       svals   += ourlens[i];
2616       jj++;
2617     }
2618   }
2619   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2620   ierr = PetscFree(vals);CHKERRQ(ierr);
2621   ierr = PetscFree(mycols);CHKERRQ(ierr);
2622   ierr = PetscFree(rowners);CHKERRQ(ierr);
2623 
2624   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2625   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2626   *newmat = A;
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2632 /*
2633     Not great since it makes two copies of the submatrix, first an SeqAIJ
2634   in local and then by concatenating the local matrices the end result.
2635   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2636 */
2637 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2638 {
2639   PetscErrorCode ierr;
2640   PetscMPIInt    rank,size;
2641   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2642   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2643   Mat            *local,M,Mreuse;
2644   PetscScalar    *vwork,*aa;
2645   MPI_Comm       comm = mat->comm;
2646   Mat_SeqAIJ     *aij;
2647 
2648 
2649   PetscFunctionBegin;
2650   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2652 
2653   if (call ==  MAT_REUSE_MATRIX) {
2654     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2655     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2656     local = &Mreuse;
2657     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2658   } else {
2659     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2660     Mreuse = *local;
2661     ierr   = PetscFree(local);CHKERRQ(ierr);
2662   }
2663 
2664   /*
2665       m - number of local rows
2666       n - number of columns (same on all processors)
2667       rstart - first row in new global matrix generated
2668   */
2669   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2670   if (call == MAT_INITIAL_MATRIX) {
2671     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2672     ii  = aij->i;
2673     jj  = aij->j;
2674 
2675     /*
2676         Determine the number of non-zeros in the diagonal and off-diagonal
2677         portions of the matrix in order to do correct preallocation
2678     */
2679 
2680     /* first get start and end of "diagonal" columns */
2681     if (csize == PETSC_DECIDE) {
2682       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2683       if (mglobal == n) { /* square matrix */
2684 	nlocal = m;
2685       } else {
2686         nlocal = n/size + ((n % size) > rank);
2687       }
2688     } else {
2689       nlocal = csize;
2690     }
2691     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2692     rstart = rend - nlocal;
2693     if (rank == size - 1 && rend != n) {
2694       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2695     }
2696 
2697     /* next, compute all the lengths */
2698     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2699     olens = dlens + m;
2700     for (i=0; i<m; i++) {
2701       jend = ii[i+1] - ii[i];
2702       olen = 0;
2703       dlen = 0;
2704       for (j=0; j<jend; j++) {
2705         if (*jj < rstart || *jj >= rend) olen++;
2706         else dlen++;
2707         jj++;
2708       }
2709       olens[i] = olen;
2710       dlens[i] = dlen;
2711     }
2712     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2713     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2714     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2715     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2716     ierr = PetscFree(dlens);CHKERRQ(ierr);
2717   } else {
2718     PetscInt ml,nl;
2719 
2720     M = *newmat;
2721     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2722     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2723     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2724     /*
2725          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2726        rather than the slower MatSetValues().
2727     */
2728     M->was_assembled = PETSC_TRUE;
2729     M->assembled     = PETSC_FALSE;
2730   }
2731   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2732   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2733   ii  = aij->i;
2734   jj  = aij->j;
2735   aa  = aij->a;
2736   for (i=0; i<m; i++) {
2737     row   = rstart + i;
2738     nz    = ii[i+1] - ii[i];
2739     cwork = jj;     jj += nz;
2740     vwork = aa;     aa += nz;
2741     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2742   }
2743 
2744   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2745   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2746   *newmat = M;
2747 
2748   /* save submatrix used in processor for next request */
2749   if (call ==  MAT_INITIAL_MATRIX) {
2750     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2751     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2752   }
2753 
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 EXTERN_C_BEGIN
2758 #undef __FUNCT__
2759 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2760 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2761 {
2762   PetscInt       m,cstart, cend,j,nnz,i,d;
2763   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2764   const PetscInt *JJ;
2765   PetscScalar    *values;
2766   PetscErrorCode ierr;
2767 
2768   PetscFunctionBegin;
2769   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2770 
2771   B->rmap.bs = B->cmap.bs = 1;
2772   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2773   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2774   m      = B->rmap.n;
2775   cstart = B->cmap.rstart;
2776   cend   = B->cmap.rend;
2777   rstart = B->rmap.rstart;
2778 
2779   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2780   o_nnz = d_nnz + m;
2781 
2782   for (i=0; i<m; i++) {
2783     nnz     = Ii[i+1]- Ii[i];
2784     JJ      = J + Ii[i];
2785     nnz_max = PetscMax(nnz_max,nnz);
2786     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2787     for (j=0; j<nnz; j++) {
2788       if (*JJ >= cstart) break;
2789       JJ++;
2790     }
2791     d = 0;
2792     for (; j<nnz; j++) {
2793       if (*JJ++ >= cend) break;
2794       d++;
2795     }
2796     d_nnz[i] = d;
2797     o_nnz[i] = nnz - d;
2798   }
2799   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2800   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2801 
2802   if (v) values = (PetscScalar*)v;
2803   else {
2804     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2805     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2806   }
2807 
2808   for (i=0; i<m; i++) {
2809     ii   = i + rstart;
2810     nnz  = Ii[i+1]- Ii[i];
2811     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2812   }
2813   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2814   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2815 
2816   if (!v) {
2817     ierr = PetscFree(values);CHKERRQ(ierr);
2818   }
2819   PetscFunctionReturn(0);
2820 }
2821 EXTERN_C_END
2822 
2823 #undef __FUNCT__
2824 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2825 /*@
2826    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2827    (the default parallel PETSc format).
2828 
2829    Collective on MPI_Comm
2830 
2831    Input Parameters:
2832 +  B - the matrix
2833 .  i - the indices into j for the start of each local row (starts with zero)
2834 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2835 -  v - optional values in the matrix
2836 
2837    Level: developer
2838 
2839    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2840      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2841 
2842 .keywords: matrix, aij, compressed row, sparse, parallel
2843 
2844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2845           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2846 @*/
2847 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2848 {
2849   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2850 
2851   PetscFunctionBegin;
2852   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2853   if (f) {
2854     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2855   }
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2861 /*@C
2862    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2863    (the default parallel PETSc format).  For good matrix assembly performance
2864    the user should preallocate the matrix storage by setting the parameters
2865    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2866    performance can be increased by more than a factor of 50.
2867 
2868    Collective on MPI_Comm
2869 
2870    Input Parameters:
2871 +  A - the matrix
2872 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2873            (same value is used for all local rows)
2874 .  d_nnz - array containing the number of nonzeros in the various rows of the
2875            DIAGONAL portion of the local submatrix (possibly different for each row)
2876            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2877            The size of this array is equal to the number of local rows, i.e 'm'.
2878            You must leave room for the diagonal entry even if it is zero.
2879 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2880            submatrix (same value is used for all local rows).
2881 -  o_nnz - array containing the number of nonzeros in the various rows of the
2882            OFF-DIAGONAL portion of the local submatrix (possibly different for
2883            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2884            structure. The size of this array is equal to the number
2885            of local rows, i.e 'm'.
2886 
2887    If the *_nnz parameter is given then the *_nz parameter is ignored
2888 
2889    The AIJ format (also called the Yale sparse matrix format or
2890    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2891    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2892 
2893    The parallel matrix is partitioned such that the first m0 rows belong to
2894    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2895    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2896 
2897    The DIAGONAL portion of the local submatrix of a processor can be defined
2898    as the submatrix which is obtained by extraction the part corresponding
2899    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2900    first row that belongs to the processor, and r2 is the last row belonging
2901    to the this processor. This is a square mxm matrix. The remaining portion
2902    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2903 
2904    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2905 
2906    Example usage:
2907 
2908    Consider the following 8x8 matrix with 34 non-zero values, that is
2909    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2910    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2911    as follows:
2912 
2913 .vb
2914             1  2  0  |  0  3  0  |  0  4
2915     Proc0   0  5  6  |  7  0  0  |  8  0
2916             9  0 10  | 11  0  0  | 12  0
2917     -------------------------------------
2918            13  0 14  | 15 16 17  |  0  0
2919     Proc1   0 18  0  | 19 20 21  |  0  0
2920             0  0  0  | 22 23  0  | 24  0
2921     -------------------------------------
2922     Proc2  25 26 27  |  0  0 28  | 29  0
2923            30  0  0  | 31 32 33  |  0 34
2924 .ve
2925 
2926    This can be represented as a collection of submatrices as:
2927 
2928 .vb
2929       A B C
2930       D E F
2931       G H I
2932 .ve
2933 
2934    Where the submatrices A,B,C are owned by proc0, D,E,F are
2935    owned by proc1, G,H,I are owned by proc2.
2936 
2937    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2938    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2939    The 'M','N' parameters are 8,8, and have the same values on all procs.
2940 
2941    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2942    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2943    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2944    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2945    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2946    matrix, ans [DF] as another SeqAIJ matrix.
2947 
2948    When d_nz, o_nz parameters are specified, d_nz storage elements are
2949    allocated for every row of the local diagonal submatrix, and o_nz
2950    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2951    One way to choose d_nz and o_nz is to use the max nonzerors per local
2952    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2953    In this case, the values of d_nz,o_nz are:
2954 .vb
2955      proc0 : dnz = 2, o_nz = 2
2956      proc1 : dnz = 3, o_nz = 2
2957      proc2 : dnz = 1, o_nz = 4
2958 .ve
2959    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2960    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2961    for proc3. i.e we are using 12+15+10=37 storage locations to store
2962    34 values.
2963 
2964    When d_nnz, o_nnz parameters are specified, the storage is specified
2965    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2966    In the above case the values for d_nnz,o_nnz are:
2967 .vb
2968      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2969      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2970      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2971 .ve
2972    Here the space allocated is sum of all the above values i.e 34, and
2973    hence pre-allocation is perfect.
2974 
2975    Level: intermediate
2976 
2977 .keywords: matrix, aij, compressed row, sparse, parallel
2978 
2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2980           MPIAIJ
2981 @*/
2982 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2983 {
2984   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2985 
2986   PetscFunctionBegin;
2987   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2988   if (f) {
2989     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2990   }
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 #undef __FUNCT__
2995 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2996 /*@C
2997      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2998          CSR format the local rows.
2999 
3000    Collective on MPI_Comm
3001 
3002    Input Parameters:
3003 +  comm - MPI communicator
3004 .  m - number of local rows (Cannot be PETSC_DECIDE)
3005 .  n - This value should be the same as the local size used in creating the
3006        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3007        calculated if N is given) For square matrices n is almost always m.
3008 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3009 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3010 .   i - row indices
3011 .   j - column indices
3012 -   a - matrix values
3013 
3014    Output Parameter:
3015 .   mat - the matrix
3016 
3017    Level: intermediate
3018 
3019    Notes:
3020        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3021      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3022      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3023 
3024        The i and j indices are 0 based
3025 
3026 .keywords: matrix, aij, compressed row, sparse, parallel
3027 
3028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3029           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3030 @*/
3031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3032 {
3033   PetscErrorCode ierr;
3034 
3035  PetscFunctionBegin;
3036   if (i[0]) {
3037     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3038   }
3039   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3040   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3041   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3042   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3043   PetscFunctionReturn(0);
3044 }
3045 
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatCreateMPIAIJ"
3048 /*@C
3049    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3050    (the default parallel PETSc format).  For good matrix assembly performance
3051    the user should preallocate the matrix storage by setting the parameters
3052    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3053    performance can be increased by more than a factor of 50.
3054 
3055    Collective on MPI_Comm
3056 
3057    Input Parameters:
3058 +  comm - MPI communicator
3059 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3060            This value should be the same as the local size used in creating the
3061            y vector for the matrix-vector product y = Ax.
3062 .  n - This value should be the same as the local size used in creating the
3063        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3064        calculated if N is given) For square matrices n is almost always m.
3065 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3066 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3067 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3068            (same value is used for all local rows)
3069 .  d_nnz - array containing the number of nonzeros in the various rows of the
3070            DIAGONAL portion of the local submatrix (possibly different for each row)
3071            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3072            The size of this array is equal to the number of local rows, i.e 'm'.
3073            You must leave room for the diagonal entry even if it is zero.
3074 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3075            submatrix (same value is used for all local rows).
3076 -  o_nnz - array containing the number of nonzeros in the various rows of the
3077            OFF-DIAGONAL portion of the local submatrix (possibly different for
3078            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3079            structure. The size of this array is equal to the number
3080            of local rows, i.e 'm'.
3081 
3082    Output Parameter:
3083 .  A - the matrix
3084 
3085    Notes:
3086    If the *_nnz parameter is given then the *_nz parameter is ignored
3087 
3088    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3089    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3090    storage requirements for this matrix.
3091 
3092    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3093    processor than it must be used on all processors that share the object for
3094    that argument.
3095 
3096    The user MUST specify either the local or global matrix dimensions
3097    (possibly both).
3098 
3099    The parallel matrix is partitioned across processors such that the
3100    first m0 rows belong to process 0, the next m1 rows belong to
3101    process 1, the next m2 rows belong to process 2 etc.. where
3102    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3103    values corresponding to [m x N] submatrix.
3104 
3105    The columns are logically partitioned with the n0 columns belonging
3106    to 0th partition, the next n1 columns belonging to the next
3107    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3108 
3109    The DIAGONAL portion of the local submatrix on any given processor
3110    is the submatrix corresponding to the rows and columns m,n
3111    corresponding to the given processor. i.e diagonal matrix on
3112    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3113    etc. The remaining portion of the local submatrix [m x (N-n)]
3114    constitute the OFF-DIAGONAL portion. The example below better
3115    illustrates this concept.
3116 
3117    For a square global matrix we define each processor's diagonal portion
3118    to be its local rows and the corresponding columns (a square submatrix);
3119    each processor's off-diagonal portion encompasses the remainder of the
3120    local matrix (a rectangular submatrix).
3121 
3122    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3123 
3124    When calling this routine with a single process communicator, a matrix of
3125    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3126    type of communicator, use the construction mechanism:
3127      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3128 
3129    By default, this format uses inodes (identical nodes) when possible.
3130    We search for consecutive rows with the same nonzero structure, thereby
3131    reusing matrix information to achieve increased efficiency.
3132 
3133    Options Database Keys:
3134 +  -mat_no_inode  - Do not use inodes
3135 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3136 -  -mat_aij_oneindex - Internally use indexing starting at 1
3137         rather than 0.  Note that when calling MatSetValues(),
3138         the user still MUST index entries starting at 0!
3139 
3140 
3141    Example usage:
3142 
3143    Consider the following 8x8 matrix with 34 non-zero values, that is
3144    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3145    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3146    as follows:
3147 
3148 .vb
3149             1  2  0  |  0  3  0  |  0  4
3150     Proc0   0  5  6  |  7  0  0  |  8  0
3151             9  0 10  | 11  0  0  | 12  0
3152     -------------------------------------
3153            13  0 14  | 15 16 17  |  0  0
3154     Proc1   0 18  0  | 19 20 21  |  0  0
3155             0  0  0  | 22 23  0  | 24  0
3156     -------------------------------------
3157     Proc2  25 26 27  |  0  0 28  | 29  0
3158            30  0  0  | 31 32 33  |  0 34
3159 .ve
3160 
3161    This can be represented as a collection of submatrices as:
3162 
3163 .vb
3164       A B C
3165       D E F
3166       G H I
3167 .ve
3168 
3169    Where the submatrices A,B,C are owned by proc0, D,E,F are
3170    owned by proc1, G,H,I are owned by proc2.
3171 
3172    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3173    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3174    The 'M','N' parameters are 8,8, and have the same values on all procs.
3175 
3176    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3177    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3178    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3179    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3180    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3181    matrix, ans [DF] as another SeqAIJ matrix.
3182 
3183    When d_nz, o_nz parameters are specified, d_nz storage elements are
3184    allocated for every row of the local diagonal submatrix, and o_nz
3185    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3186    One way to choose d_nz and o_nz is to use the max nonzerors per local
3187    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3188    In this case, the values of d_nz,o_nz are:
3189 .vb
3190      proc0 : dnz = 2, o_nz = 2
3191      proc1 : dnz = 3, o_nz = 2
3192      proc2 : dnz = 1, o_nz = 4
3193 .ve
3194    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3195    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3196    for proc3. i.e we are using 12+15+10=37 storage locations to store
3197    34 values.
3198 
3199    When d_nnz, o_nnz parameters are specified, the storage is specified
3200    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3201    In the above case the values for d_nnz,o_nnz are:
3202 .vb
3203      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3204      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3205      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3206 .ve
3207    Here the space allocated is sum of all the above values i.e 34, and
3208    hence pre-allocation is perfect.
3209 
3210    Level: intermediate
3211 
3212 .keywords: matrix, aij, compressed row, sparse, parallel
3213 
3214 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3215           MPIAIJ, MatCreateMPIAIJWithArrays()
3216 @*/
3217 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3218 {
3219   PetscErrorCode ierr;
3220   PetscMPIInt    size;
3221 
3222   PetscFunctionBegin;
3223   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3224   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3225   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3226   if (size > 1) {
3227     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3228     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3229   } else {
3230     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3231     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3232   }
3233   PetscFunctionReturn(0);
3234 }
3235 
3236 #undef __FUNCT__
3237 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3238 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3239 {
3240   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3241 
3242   PetscFunctionBegin;
3243   *Ad     = a->A;
3244   *Ao     = a->B;
3245   *colmap = a->garray;
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3251 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3252 {
3253   PetscErrorCode ierr;
3254   PetscInt       i;
3255   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3256 
3257   PetscFunctionBegin;
3258   if (coloring->ctype == IS_COLORING_GLOBAL) {
3259     ISColoringValue *allcolors,*colors;
3260     ISColoring      ocoloring;
3261 
3262     /* set coloring for diagonal portion */
3263     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3264 
3265     /* set coloring for off-diagonal portion */
3266     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3267     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3268     for (i=0; i<a->B->cmap.n; i++) {
3269       colors[i] = allcolors[a->garray[i]];
3270     }
3271     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3272     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3273     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3274     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3275   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3276     ISColoringValue *colors;
3277     PetscInt        *larray;
3278     ISColoring      ocoloring;
3279 
3280     /* set coloring for diagonal portion */
3281     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3282     for (i=0; i<a->A->cmap.n; i++) {
3283       larray[i] = i + A->cmap.rstart;
3284     }
3285     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3286     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3287     for (i=0; i<a->A->cmap.n; i++) {
3288       colors[i] = coloring->colors[larray[i]];
3289     }
3290     ierr = PetscFree(larray);CHKERRQ(ierr);
3291     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3292     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3293     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3294 
3295     /* set coloring for off-diagonal portion */
3296     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3297     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3298     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3299     for (i=0; i<a->B->cmap.n; i++) {
3300       colors[i] = coloring->colors[larray[i]];
3301     }
3302     ierr = PetscFree(larray);CHKERRQ(ierr);
3303     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3304     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3305     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3306   } else {
3307     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3308   }
3309 
3310   PetscFunctionReturn(0);
3311 }
3312 
3313 #if defined(PETSC_HAVE_ADIC)
3314 #undef __FUNCT__
3315 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3316 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3317 {
3318   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3319   PetscErrorCode ierr;
3320 
3321   PetscFunctionBegin;
3322   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3323   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3324   PetscFunctionReturn(0);
3325 }
3326 #endif
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3330 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3331 {
3332   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3333   PetscErrorCode ierr;
3334 
3335   PetscFunctionBegin;
3336   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3337   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatMerge"
3343 /*@C
3344       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3345                  matrices from each processor
3346 
3347     Collective on MPI_Comm
3348 
3349    Input Parameters:
3350 +    comm - the communicators the parallel matrix will live on
3351 .    inmat - the input sequential matrices
3352 .    n - number of local columns (or PETSC_DECIDE)
3353 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3354 
3355    Output Parameter:
3356 .    outmat - the parallel matrix generated
3357 
3358     Level: advanced
3359 
3360    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3361 
3362 @*/
3363 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3364 {
3365   PetscErrorCode ierr;
3366   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3367   PetscInt       *indx;
3368   PetscScalar    *values;
3369 
3370   PetscFunctionBegin;
3371   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3372   if (scall == MAT_INITIAL_MATRIX){
3373     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3374     if (n == PETSC_DECIDE){
3375       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3376     }
3377     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3378     rstart -= m;
3379 
3380     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3381     for (i=0;i<m;i++) {
3382       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3383       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3384       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3385     }
3386     /* This routine will ONLY return MPIAIJ type matrix */
3387     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3388     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3389     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3390     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3391     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3392 
3393   } else if (scall == MAT_REUSE_MATRIX){
3394     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3395   } else {
3396     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3397   }
3398 
3399   for (i=0;i<m;i++) {
3400     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3401     Ii    = i + rstart;
3402     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3403     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3404   }
3405   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3406   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3407   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3408 
3409   PetscFunctionReturn(0);
3410 }
3411 
3412 #undef __FUNCT__
3413 #define __FUNCT__ "MatFileSplit"
3414 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3415 {
3416   PetscErrorCode    ierr;
3417   PetscMPIInt       rank;
3418   PetscInt          m,N,i,rstart,nnz;
3419   size_t            len;
3420   const PetscInt    *indx;
3421   PetscViewer       out;
3422   char              *name;
3423   Mat               B;
3424   const PetscScalar *values;
3425 
3426   PetscFunctionBegin;
3427   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3428   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3429   /* Should this be the type of the diagonal block of A? */
3430   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3431   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3432   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3433   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3434   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3435   for (i=0;i<m;i++) {
3436     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3437     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3438     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3439   }
3440   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3441   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3442 
3443   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3444   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3445   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3446   sprintf(name,"%s.%d",outfile,rank);
3447   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3448   ierr = PetscFree(name);
3449   ierr = MatView(B,out);CHKERRQ(ierr);
3450   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3451   ierr = MatDestroy(B);CHKERRQ(ierr);
3452   PetscFunctionReturn(0);
3453 }
3454 
3455 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3456 #undef __FUNCT__
3457 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3458 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3459 {
3460   PetscErrorCode       ierr;
3461   Mat_Merge_SeqsToMPI  *merge;
3462   PetscContainer       container;
3463 
3464   PetscFunctionBegin;
3465   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3466   if (container) {
3467     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3468     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3469     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3470     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3471     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3472     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3473     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3474     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3475     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3476     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3477     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3478     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3479 
3480     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3481     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3482   }
3483   ierr = PetscFree(merge);CHKERRQ(ierr);
3484 
3485   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3486   PetscFunctionReturn(0);
3487 }
3488 
3489 #include "src/mat/utils/freespace.h"
3490 #include "petscbt.h"
3491 static PetscEvent logkey_seqstompinum = 0;
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3494 /*@C
3495       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3496                  matrices from each processor
3497 
3498     Collective on MPI_Comm
3499 
3500    Input Parameters:
3501 +    comm - the communicators the parallel matrix will live on
3502 .    seqmat - the input sequential matrices
3503 .    m - number of local rows (or PETSC_DECIDE)
3504 .    n - number of local columns (or PETSC_DECIDE)
3505 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3506 
3507    Output Parameter:
3508 .    mpimat - the parallel matrix generated
3509 
3510     Level: advanced
3511 
3512    Notes:
3513      The dimensions of the sequential matrix in each processor MUST be the same.
3514      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3515      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3516 @*/
3517 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3518 {
3519   PetscErrorCode       ierr;
3520   MPI_Comm             comm=mpimat->comm;
3521   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3522   PetscMPIInt          size,rank,taga,*len_s;
3523   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3524   PetscInt             proc,m;
3525   PetscInt             **buf_ri,**buf_rj;
3526   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3527   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3528   MPI_Request          *s_waits,*r_waits;
3529   MPI_Status           *status;
3530   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3531   Mat_Merge_SeqsToMPI  *merge;
3532   PetscContainer       container;
3533 
3534   PetscFunctionBegin;
3535   if (!logkey_seqstompinum) {
3536     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3537   }
3538   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3539 
3540   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3541   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3542 
3543   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3544   if (container) {
3545     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3546   }
3547   bi     = merge->bi;
3548   bj     = merge->bj;
3549   buf_ri = merge->buf_ri;
3550   buf_rj = merge->buf_rj;
3551 
3552   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3553   owners = merge->rowmap.range;
3554   len_s  = merge->len_s;
3555 
3556   /* send and recv matrix values */
3557   /*-----------------------------*/
3558   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3559   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3560 
3561   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3562   for (proc=0,k=0; proc<size; proc++){
3563     if (!len_s[proc]) continue;
3564     i = owners[proc];
3565     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3566     k++;
3567   }
3568 
3569   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3570   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3571   ierr = PetscFree(status);CHKERRQ(ierr);
3572 
3573   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3574   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3575 
3576   /* insert mat values of mpimat */
3577   /*----------------------------*/
3578   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3579   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3580   nextrow = buf_ri_k + merge->nrecv;
3581   nextai  = nextrow + merge->nrecv;
3582 
3583   for (k=0; k<merge->nrecv; k++){
3584     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3585     nrows = *(buf_ri_k[k]);
3586     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3587     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3588   }
3589 
3590   /* set values of ba */
3591   m = merge->rowmap.n;
3592   for (i=0; i<m; i++) {
3593     arow = owners[rank] + i;
3594     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3595     bnzi = bi[i+1] - bi[i];
3596     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3597 
3598     /* add local non-zero vals of this proc's seqmat into ba */
3599     anzi = ai[arow+1] - ai[arow];
3600     aj   = a->j + ai[arow];
3601     aa   = a->a + ai[arow];
3602     nextaj = 0;
3603     for (j=0; nextaj<anzi; j++){
3604       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3605         ba_i[j] += aa[nextaj++];
3606       }
3607     }
3608 
3609     /* add received vals into ba */
3610     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3611       /* i-th row */
3612       if (i == *nextrow[k]) {
3613         anzi = *(nextai[k]+1) - *nextai[k];
3614         aj   = buf_rj[k] + *(nextai[k]);
3615         aa   = abuf_r[k] + *(nextai[k]);
3616         nextaj = 0;
3617         for (j=0; nextaj<anzi; j++){
3618           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3619             ba_i[j] += aa[nextaj++];
3620           }
3621         }
3622         nextrow[k]++; nextai[k]++;
3623       }
3624     }
3625     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3626   }
3627   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3628   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3629 
3630   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3631   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3632   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3633   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3634   PetscFunctionReturn(0);
3635 }
3636 
3637 static PetscEvent logkey_seqstompisym = 0;
3638 #undef __FUNCT__
3639 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3640 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3641 {
3642   PetscErrorCode       ierr;
3643   Mat                  B_mpi;
3644   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3645   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3646   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3647   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3648   PetscInt             len,proc,*dnz,*onz;
3649   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3650   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3651   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3652   MPI_Status           *status;
3653   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3654   PetscBT              lnkbt;
3655   Mat_Merge_SeqsToMPI  *merge;
3656   PetscContainer       container;
3657 
3658   PetscFunctionBegin;
3659   if (!logkey_seqstompisym) {
3660     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3661   }
3662   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3663 
3664   /* make sure it is a PETSc comm */
3665   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3666   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3667   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3668 
3669   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3670   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3671 
3672   /* determine row ownership */
3673   /*---------------------------------------------------------*/
3674   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3675   merge->rowmap.n = m;
3676   merge->rowmap.N = M;
3677   merge->rowmap.bs = 1;
3678   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3679   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3680   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3681 
3682   m      = merge->rowmap.n;
3683   M      = merge->rowmap.N;
3684   owners = merge->rowmap.range;
3685 
3686   /* determine the number of messages to send, their lengths */
3687   /*---------------------------------------------------------*/
3688   len_s  = merge->len_s;
3689 
3690   len = 0;  /* length of buf_si[] */
3691   merge->nsend = 0;
3692   for (proc=0; proc<size; proc++){
3693     len_si[proc] = 0;
3694     if (proc == rank){
3695       len_s[proc] = 0;
3696     } else {
3697       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3698       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3699     }
3700     if (len_s[proc]) {
3701       merge->nsend++;
3702       nrows = 0;
3703       for (i=owners[proc]; i<owners[proc+1]; i++){
3704         if (ai[i+1] > ai[i]) nrows++;
3705       }
3706       len_si[proc] = 2*(nrows+1);
3707       len += len_si[proc];
3708     }
3709   }
3710 
3711   /* determine the number and length of messages to receive for ij-structure */
3712   /*-------------------------------------------------------------------------*/
3713   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3714   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3715 
3716   /* post the Irecv of j-structure */
3717   /*-------------------------------*/
3718   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3719   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3720 
3721   /* post the Isend of j-structure */
3722   /*--------------------------------*/
3723   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3724   sj_waits = si_waits + merge->nsend;
3725 
3726   for (proc=0, k=0; proc<size; proc++){
3727     if (!len_s[proc]) continue;
3728     i = owners[proc];
3729     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3730     k++;
3731   }
3732 
3733   /* receives and sends of j-structure are complete */
3734   /*------------------------------------------------*/
3735   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3736   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3737 
3738   /* send and recv i-structure */
3739   /*---------------------------*/
3740   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3741   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3742 
3743   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3744   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3745   for (proc=0,k=0; proc<size; proc++){
3746     if (!len_s[proc]) continue;
3747     /* form outgoing message for i-structure:
3748          buf_si[0]:                 nrows to be sent
3749                [1:nrows]:           row index (global)
3750                [nrows+1:2*nrows+1]: i-structure index
3751     */
3752     /*-------------------------------------------*/
3753     nrows = len_si[proc]/2 - 1;
3754     buf_si_i    = buf_si + nrows+1;
3755     buf_si[0]   = nrows;
3756     buf_si_i[0] = 0;
3757     nrows = 0;
3758     for (i=owners[proc]; i<owners[proc+1]; i++){
3759       anzi = ai[i+1] - ai[i];
3760       if (anzi) {
3761         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3762         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3763         nrows++;
3764       }
3765     }
3766     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3767     k++;
3768     buf_si += len_si[proc];
3769   }
3770 
3771   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3772   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3773 
3774   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3775   for (i=0; i<merge->nrecv; i++){
3776     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3777   }
3778 
3779   ierr = PetscFree(len_si);CHKERRQ(ierr);
3780   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3781   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3782   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3783   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3784   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3785   ierr = PetscFree(status);CHKERRQ(ierr);
3786 
3787   /* compute a local seq matrix in each processor */
3788   /*----------------------------------------------*/
3789   /* allocate bi array and free space for accumulating nonzero column info */
3790   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3791   bi[0] = 0;
3792 
3793   /* create and initialize a linked list */
3794   nlnk = N+1;
3795   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3796 
3797   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3798   len = 0;
3799   len  = ai[owners[rank+1]] - ai[owners[rank]];
3800   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3801   current_space = free_space;
3802 
3803   /* determine symbolic info for each local row */
3804   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3805   nextrow = buf_ri_k + merge->nrecv;
3806   nextai  = nextrow + merge->nrecv;
3807   for (k=0; k<merge->nrecv; k++){
3808     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3809     nrows = *buf_ri_k[k];
3810     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3811     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3812   }
3813 
3814   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3815   len = 0;
3816   for (i=0;i<m;i++) {
3817     bnzi   = 0;
3818     /* add local non-zero cols of this proc's seqmat into lnk */
3819     arow   = owners[rank] + i;
3820     anzi   = ai[arow+1] - ai[arow];
3821     aj     = a->j + ai[arow];
3822     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3823     bnzi += nlnk;
3824     /* add received col data into lnk */
3825     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3826       if (i == *nextrow[k]) { /* i-th row */
3827         anzi = *(nextai[k]+1) - *nextai[k];
3828         aj   = buf_rj[k] + *nextai[k];
3829         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3830         bnzi += nlnk;
3831         nextrow[k]++; nextai[k]++;
3832       }
3833     }
3834     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3835 
3836     /* if free space is not available, make more free space */
3837     if (current_space->local_remaining<bnzi) {
3838       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3839       nspacedouble++;
3840     }
3841     /* copy data into free space, then initialize lnk */
3842     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3843     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3844 
3845     current_space->array           += bnzi;
3846     current_space->local_used      += bnzi;
3847     current_space->local_remaining -= bnzi;
3848 
3849     bi[i+1] = bi[i] + bnzi;
3850   }
3851 
3852   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3853 
3854   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3855   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3856   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3857 
3858   /* create symbolic parallel matrix B_mpi */
3859   /*---------------------------------------*/
3860   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3861   if (n==PETSC_DECIDE) {
3862     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3863   } else {
3864     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3865   }
3866   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3867   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3868   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3869 
3870   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3871   B_mpi->assembled     = PETSC_FALSE;
3872   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3873   merge->bi            = bi;
3874   merge->bj            = bj;
3875   merge->buf_ri        = buf_ri;
3876   merge->buf_rj        = buf_rj;
3877   merge->coi           = PETSC_NULL;
3878   merge->coj           = PETSC_NULL;
3879   merge->owners_co     = PETSC_NULL;
3880 
3881   /* attach the supporting struct to B_mpi for reuse */
3882   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3883   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3884   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3885   *mpimat = B_mpi;
3886 
3887   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3888   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3889   PetscFunctionReturn(0);
3890 }
3891 
3892 static PetscEvent logkey_seqstompi = 0;
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatMerge_SeqsToMPI"
3895 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3896 {
3897   PetscErrorCode   ierr;
3898 
3899   PetscFunctionBegin;
3900   if (!logkey_seqstompi) {
3901     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3902   }
3903   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3904   if (scall == MAT_INITIAL_MATRIX){
3905     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3906   }
3907   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3908   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3909   PetscFunctionReturn(0);
3910 }
3911 static PetscEvent logkey_getlocalmat = 0;
3912 #undef __FUNCT__
3913 #define __FUNCT__ "MatGetLocalMat"
3914 /*@C
3915      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3916 
3917     Not Collective
3918 
3919    Input Parameters:
3920 +    A - the matrix
3921 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3922 
3923    Output Parameter:
3924 .    A_loc - the local sequential matrix generated
3925 
3926     Level: developer
3927 
3928 @*/
3929 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3930 {
3931   PetscErrorCode  ierr;
3932   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3933   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3934   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3935   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3936   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3937   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3938 
3939   PetscFunctionBegin;
3940   if (!logkey_getlocalmat) {
3941     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3942   }
3943   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3944   if (scall == MAT_INITIAL_MATRIX){
3945     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3946     ci[0] = 0;
3947     for (i=0; i<am; i++){
3948       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3949     }
3950     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3951     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3952     k = 0;
3953     for (i=0; i<am; i++) {
3954       ncols_o = bi[i+1] - bi[i];
3955       ncols_d = ai[i+1] - ai[i];
3956       /* off-diagonal portion of A */
3957       for (jo=0; jo<ncols_o; jo++) {
3958         col = cmap[*bj];
3959         if (col >= cstart) break;
3960         cj[k]   = col; bj++;
3961         ca[k++] = *ba++;
3962       }
3963       /* diagonal portion of A */
3964       for (j=0; j<ncols_d; j++) {
3965         cj[k]   = cstart + *aj++;
3966         ca[k++] = *aa++;
3967       }
3968       /* off-diagonal portion of A */
3969       for (j=jo; j<ncols_o; j++) {
3970         cj[k]   = cmap[*bj++];
3971         ca[k++] = *ba++;
3972       }
3973     }
3974     /* put together the new matrix */
3975     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3976     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3977     /* Since these are PETSc arrays, change flags to free them as necessary. */
3978     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3979     mat->free_a  = PETSC_TRUE;
3980     mat->free_ij = PETSC_TRUE;
3981     mat->nonew   = 0;
3982   } else if (scall == MAT_REUSE_MATRIX){
3983     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3984     ci = mat->i; cj = mat->j; ca = mat->a;
3985     for (i=0; i<am; i++) {
3986       /* off-diagonal portion of A */
3987       ncols_o = bi[i+1] - bi[i];
3988       for (jo=0; jo<ncols_o; jo++) {
3989         col = cmap[*bj];
3990         if (col >= cstart) break;
3991         *ca++ = *ba++; bj++;
3992       }
3993       /* diagonal portion of A */
3994       ncols_d = ai[i+1] - ai[i];
3995       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3996       /* off-diagonal portion of A */
3997       for (j=jo; j<ncols_o; j++) {
3998         *ca++ = *ba++; bj++;
3999       }
4000     }
4001   } else {
4002     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4003   }
4004 
4005   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4006   PetscFunctionReturn(0);
4007 }
4008 
4009 static PetscEvent logkey_getlocalmatcondensed = 0;
4010 #undef __FUNCT__
4011 #define __FUNCT__ "MatGetLocalMatCondensed"
4012 /*@C
4013      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4014 
4015     Not Collective
4016 
4017    Input Parameters:
4018 +    A - the matrix
4019 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4020 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4021 
4022    Output Parameter:
4023 .    A_loc - the local sequential matrix generated
4024 
4025     Level: developer
4026 
4027 @*/
4028 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4029 {
4030   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4031   PetscErrorCode    ierr;
4032   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4033   IS                isrowa,iscola;
4034   Mat               *aloc;
4035 
4036   PetscFunctionBegin;
4037   if (!logkey_getlocalmatcondensed) {
4038     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4039   }
4040   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4041   if (!row){
4042     start = A->rmap.rstart; end = A->rmap.rend;
4043     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4044   } else {
4045     isrowa = *row;
4046   }
4047   if (!col){
4048     start = A->cmap.rstart;
4049     cmap  = a->garray;
4050     nzA   = a->A->cmap.n;
4051     nzB   = a->B->cmap.n;
4052     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4053     ncols = 0;
4054     for (i=0; i<nzB; i++) {
4055       if (cmap[i] < start) idx[ncols++] = cmap[i];
4056       else break;
4057     }
4058     imark = i;
4059     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4060     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4061     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4062     ierr = PetscFree(idx);CHKERRQ(ierr);
4063   } else {
4064     iscola = *col;
4065   }
4066   if (scall != MAT_INITIAL_MATRIX){
4067     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4068     aloc[0] = *A_loc;
4069   }
4070   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4071   *A_loc = aloc[0];
4072   ierr = PetscFree(aloc);CHKERRQ(ierr);
4073   if (!row){
4074     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4075   }
4076   if (!col){
4077     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4078   }
4079   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4080   PetscFunctionReturn(0);
4081 }
4082 
4083 static PetscEvent logkey_GetBrowsOfAcols = 0;
4084 #undef __FUNCT__
4085 #define __FUNCT__ "MatGetBrowsOfAcols"
4086 /*@C
4087     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4088 
4089     Collective on Mat
4090 
4091    Input Parameters:
4092 +    A,B - the matrices in mpiaij format
4093 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4094 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4095 
4096    Output Parameter:
4097 +    rowb, colb - index sets of rows and columns of B to extract
4098 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4099 -    B_seq - the sequential matrix generated
4100 
4101     Level: developer
4102 
4103 @*/
4104 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4105 {
4106   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4107   PetscErrorCode    ierr;
4108   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4109   IS                isrowb,iscolb;
4110   Mat               *bseq;
4111 
4112   PetscFunctionBegin;
4113   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4114     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4115   }
4116   if (!logkey_GetBrowsOfAcols) {
4117     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4118   }
4119   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4120 
4121   if (scall == MAT_INITIAL_MATRIX){
4122     start = A->cmap.rstart;
4123     cmap  = a->garray;
4124     nzA   = a->A->cmap.n;
4125     nzB   = a->B->cmap.n;
4126     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4127     ncols = 0;
4128     for (i=0; i<nzB; i++) {  /* row < local row index */
4129       if (cmap[i] < start) idx[ncols++] = cmap[i];
4130       else break;
4131     }
4132     imark = i;
4133     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4134     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4135     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4136     ierr = PetscFree(idx);CHKERRQ(ierr);
4137     *brstart = imark;
4138     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4139   } else {
4140     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4141     isrowb = *rowb; iscolb = *colb;
4142     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4143     bseq[0] = *B_seq;
4144   }
4145   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4146   *B_seq = bseq[0];
4147   ierr = PetscFree(bseq);CHKERRQ(ierr);
4148   if (!rowb){
4149     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4150   } else {
4151     *rowb = isrowb;
4152   }
4153   if (!colb){
4154     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4155   } else {
4156     *colb = iscolb;
4157   }
4158   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4159   PetscFunctionReturn(0);
4160 }
4161 
4162 static PetscEvent logkey_GetBrowsOfAocols = 0;
4163 #undef __FUNCT__
4164 #define __FUNCT__ "MatGetBrowsOfAoCols"
4165 /*@C
4166     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4167     of the OFF-DIAGONAL portion of local A
4168 
4169     Collective on Mat
4170 
4171    Input Parameters:
4172 +    A,B - the matrices in mpiaij format
4173 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4174 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4175 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4176 
4177    Output Parameter:
4178 +    B_oth - the sequential matrix generated
4179 
4180     Level: developer
4181 
4182 @*/
4183 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4184 {
4185   VecScatter_MPI_General *gen_to,*gen_from;
4186   PetscErrorCode         ierr;
4187   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4188   Mat_SeqAIJ             *b_oth;
4189   VecScatter             ctx=a->Mvctx;
4190   MPI_Comm               comm=ctx->comm;
4191   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4192   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4193   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4194   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4195   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4196   MPI_Status             *sstatus,rstatus;
4197   PetscMPIInt            jj;
4198   PetscInt               *cols,sbs,rbs;
4199   PetscScalar            *vals;
4200 
4201   PetscFunctionBegin;
4202   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4203     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4204   }
4205   if (!logkey_GetBrowsOfAocols) {
4206     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4207   }
4208   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4209   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4210 
4211   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4212   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4213   rvalues  = gen_from->values; /* holds the length of receiving row */
4214   svalues  = gen_to->values;   /* holds the length of sending row */
4215   nrecvs   = gen_from->n;
4216   nsends   = gen_to->n;
4217 
4218   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4219   srow     = gen_to->indices;   /* local row index to be sent */
4220   sstarts  = gen_to->starts;
4221   sprocs   = gen_to->procs;
4222   sstatus  = gen_to->sstatus;
4223   sbs      = gen_to->bs;
4224   rstarts  = gen_from->starts;
4225   rprocs   = gen_from->procs;
4226   rbs      = gen_from->bs;
4227 
4228   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4229   if (scall == MAT_INITIAL_MATRIX){
4230     /* i-array */
4231     /*---------*/
4232     /*  post receives */
4233     for (i=0; i<nrecvs; i++){
4234       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4235       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4236       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4237     }
4238 
4239     /* pack the outgoing message */
4240     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4241     rstartsj = sstartsj + nsends +1;
4242     sstartsj[0] = 0;  rstartsj[0] = 0;
4243     len = 0; /* total length of j or a array to be sent */
4244     k = 0;
4245     for (i=0; i<nsends; i++){
4246       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4247       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4248       for (j=0; j<nrows; j++) {
4249         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4250         for (l=0; l<sbs; l++){
4251           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4252           rowlen[j*sbs+l] = ncols;
4253           len += ncols;
4254           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4255         }
4256         k++;
4257       }
4258       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4259       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4260     }
4261     /* recvs and sends of i-array are completed */
4262     i = nrecvs;
4263     while (i--) {
4264       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4265     }
4266     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4267 
4268     /* allocate buffers for sending j and a arrays */
4269     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4270     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4271 
4272     /* create i-array of B_oth */
4273     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4274     b_othi[0] = 0;
4275     len = 0; /* total length of j or a array to be received */
4276     k = 0;
4277     for (i=0; i<nrecvs; i++){
4278       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4279       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4280       for (j=0; j<nrows; j++) {
4281         b_othi[k+1] = b_othi[k] + rowlen[j];
4282         len += rowlen[j]; k++;
4283       }
4284       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4285     }
4286 
4287     /* allocate space for j and a arrrays of B_oth */
4288     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4289     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4290 
4291     /* j-array */
4292     /*---------*/
4293     /*  post receives of j-array */
4294     for (i=0; i<nrecvs; i++){
4295       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4296       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4297     }
4298 
4299     /* pack the outgoing message j-array */
4300     k = 0;
4301     for (i=0; i<nsends; i++){
4302       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4303       bufJ = bufj+sstartsj[i];
4304       for (j=0; j<nrows; j++) {
4305         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4306         for (ll=0; ll<sbs; ll++){
4307           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4308           for (l=0; l<ncols; l++){
4309             *bufJ++ = cols[l];
4310           }
4311           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4312         }
4313       }
4314       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4315     }
4316 
4317     /* recvs and sends of j-array are completed */
4318     i = nrecvs;
4319     while (i--) {
4320       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4321     }
4322     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4323   } else if (scall == MAT_REUSE_MATRIX){
4324     sstartsj = *startsj;
4325     rstartsj = sstartsj + nsends +1;
4326     bufa     = *bufa_ptr;
4327     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4328     b_otha   = b_oth->a;
4329   } else {
4330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4331   }
4332 
4333   /* a-array */
4334   /*---------*/
4335   /*  post receives of a-array */
4336   for (i=0; i<nrecvs; i++){
4337     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4338     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4339   }
4340 
4341   /* pack the outgoing message a-array */
4342   k = 0;
4343   for (i=0; i<nsends; i++){
4344     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4345     bufA = bufa+sstartsj[i];
4346     for (j=0; j<nrows; j++) {
4347       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4348       for (ll=0; ll<sbs; ll++){
4349         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4350         for (l=0; l<ncols; l++){
4351           *bufA++ = vals[l];
4352         }
4353         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4354       }
4355     }
4356     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4357   }
4358   /* recvs and sends of a-array are completed */
4359   i = nrecvs;
4360   while (i--) {
4361     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4362   }
4363   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4364   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4365 
4366   if (scall == MAT_INITIAL_MATRIX){
4367     /* put together the new matrix */
4368     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4369 
4370     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4371     /* Since these are PETSc arrays, change flags to free them as necessary. */
4372     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4373     b_oth->free_a  = PETSC_TRUE;
4374     b_oth->free_ij = PETSC_TRUE;
4375     b_oth->nonew   = 0;
4376 
4377     ierr = PetscFree(bufj);CHKERRQ(ierr);
4378     if (!startsj || !bufa_ptr){
4379       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4380       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4381     } else {
4382       *startsj  = sstartsj;
4383       *bufa_ptr = bufa;
4384     }
4385   }
4386   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4387   PetscFunctionReturn(0);
4388 }
4389 
4390 #undef __FUNCT__
4391 #define __FUNCT__ "MatGetCommunicationStructs"
4392 /*@C
4393   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4394 
4395   Not Collective
4396 
4397   Input Parameters:
4398 . A - The matrix in mpiaij format
4399 
4400   Output Parameter:
4401 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4402 . colmap - A map from global column index to local index into lvec
4403 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4404 
4405   Level: developer
4406 
4407 @*/
4408 #if defined (PETSC_USE_CTABLE)
4409 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4410 #else
4411 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4412 #endif
4413 {
4414   Mat_MPIAIJ *a;
4415 
4416   PetscFunctionBegin;
4417   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4418   PetscValidPointer(lvec, 2)
4419   PetscValidPointer(colmap, 3)
4420   PetscValidPointer(multScatter, 4)
4421   a = (Mat_MPIAIJ *) A->data;
4422   if (lvec) *lvec = a->lvec;
4423   if (colmap) *colmap = a->colmap;
4424   if (multScatter) *multScatter = a->Mvctx;
4425   PetscFunctionReturn(0);
4426 }
4427 
4428 EXTERN_C_BEGIN
4429 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4430 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4431 EXTERN_C_END
4432 
4433 /*MC
4434    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4435 
4436    Options Database Keys:
4437 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4438 
4439   Level: beginner
4440 
4441 .seealso: MatCreateMPIAIJ
4442 M*/
4443 
4444 EXTERN_C_BEGIN
4445 #undef __FUNCT__
4446 #define __FUNCT__ "MatCreate_MPIAIJ"
4447 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4448 {
4449   Mat_MPIAIJ     *b;
4450   PetscErrorCode ierr;
4451   PetscMPIInt    size;
4452 
4453   PetscFunctionBegin;
4454   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4455 
4456   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4457   B->data         = (void*)b;
4458   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4459   B->factor       = 0;
4460   B->rmap.bs      = 1;
4461   B->assembled    = PETSC_FALSE;
4462   B->mapping      = 0;
4463 
4464   B->insertmode      = NOT_SET_VALUES;
4465   b->size            = size;
4466   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4467 
4468   /* build cache for off array entries formed */
4469   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4470   b->donotstash  = PETSC_FALSE;
4471   b->colmap      = 0;
4472   b->garray      = 0;
4473   b->roworiented = PETSC_TRUE;
4474 
4475   /* stuff used for matrix vector multiply */
4476   b->lvec      = PETSC_NULL;
4477   b->Mvctx     = PETSC_NULL;
4478 
4479   /* stuff for MatGetRow() */
4480   b->rowindices   = 0;
4481   b->rowvalues    = 0;
4482   b->getrowactive = PETSC_FALSE;
4483 
4484 
4485   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4486                                      "MatStoreValues_MPIAIJ",
4487                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4488   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4489                                      "MatRetrieveValues_MPIAIJ",
4490                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4492 				     "MatGetDiagonalBlock_MPIAIJ",
4493                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4495 				     "MatIsTranspose_MPIAIJ",
4496 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4498 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4499 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4500   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4501 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4502 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4504 				     "MatDiagonalScaleLocal_MPIAIJ",
4505 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4507                                      "MatConvert_MPIAIJ_MPICSRPERM",
4508                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4510                                      "MatConvert_MPIAIJ_MPICRL",
4511                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4512   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4513   PetscFunctionReturn(0);
4514 }
4515 EXTERN_C_END
4516 
4517 #undef __FUNCT__
4518 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4519 /*@C
4520      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4521          and "off-diagonal" part of the matrix in CSR format.
4522 
4523    Collective on MPI_Comm
4524 
4525    Input Parameters:
4526 +  comm - MPI communicator
4527 .  m - number of local rows (Cannot be PETSC_DECIDE)
4528 .  n - This value should be the same as the local size used in creating the
4529        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4530        calculated if N is given) For square matrices n is almost always m.
4531 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4532 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4533 .   i - row indices for "diagonal" portion of matrix
4534 .   j - column indices
4535 .   a - matrix values
4536 .   oi - row indices for "off-diagonal" portion of matrix
4537 .   oj - column indices
4538 -   oa - matrix values
4539 
4540    Output Parameter:
4541 .   mat - the matrix
4542 
4543    Level: advanced
4544 
4545    Notes:
4546        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4547 
4548        The i and j indices are 0 based
4549 
4550        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4551 
4552 
4553 .keywords: matrix, aij, compressed row, sparse, parallel
4554 
4555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4556           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4557 @*/
4558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4559 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4560 {
4561   PetscErrorCode ierr;
4562   Mat_MPIAIJ     *maij;
4563 
4564  PetscFunctionBegin;
4565   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4566   if (i[0]) {
4567     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4568   }
4569   if (oi[0]) {
4570     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4571   }
4572   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4573   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4574   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4575   maij = (Mat_MPIAIJ*) (*mat)->data;
4576   maij->donotstash     = PETSC_TRUE;
4577   (*mat)->preallocated = PETSC_TRUE;
4578 
4579   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4580   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4581   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4582 
4583   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4584   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4585 
4586   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4587   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4588   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4589   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4590 
4591   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593   PetscFunctionReturn(0);
4594 }
4595 
4596 /*
4597     Special version for direct calls from Fortran
4598 */
4599 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4600 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4601 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4602 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4603 #endif
4604 
4605 /* Change these macros so can be used in void function */
4606 #undef CHKERRQ
4607 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4608 #undef SETERRQ2
4609 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4610 #undef SETERRQ
4611 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4612 
4613 EXTERN_C_BEGIN
4614 #undef __FUNCT__
4615 #define __FUNCT__ "matsetvaluesmpiaij_"
4616 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4617 {
4618   Mat            mat = *mmat;
4619   PetscInt       m = *mm, n = *mn;
4620   InsertMode     addv = *maddv;
4621   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4622   PetscScalar    value;
4623   PetscErrorCode ierr;
4624 
4625   MatPreallocated(mat);
4626   if (mat->insertmode == NOT_SET_VALUES) {
4627     mat->insertmode = addv;
4628   }
4629 #if defined(PETSC_USE_DEBUG)
4630   else if (mat->insertmode != addv) {
4631     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4632   }
4633 #endif
4634   {
4635   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4636   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4637   PetscTruth     roworiented = aij->roworiented;
4638 
4639   /* Some Variables required in the macro */
4640   Mat            A = aij->A;
4641   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4642   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4643   PetscScalar    *aa = a->a;
4644   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4645   Mat            B = aij->B;
4646   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4647   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4648   PetscScalar    *ba = b->a;
4649 
4650   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4651   PetscInt       nonew = a->nonew;
4652   PetscScalar    *ap1,*ap2;
4653 
4654   PetscFunctionBegin;
4655   for (i=0; i<m; i++) {
4656     if (im[i] < 0) continue;
4657 #if defined(PETSC_USE_DEBUG)
4658     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4659 #endif
4660     if (im[i] >= rstart && im[i] < rend) {
4661       row      = im[i] - rstart;
4662       lastcol1 = -1;
4663       rp1      = aj + ai[row];
4664       ap1      = aa + ai[row];
4665       rmax1    = aimax[row];
4666       nrow1    = ailen[row];
4667       low1     = 0;
4668       high1    = nrow1;
4669       lastcol2 = -1;
4670       rp2      = bj + bi[row];
4671       ap2      = ba + bi[row];
4672       rmax2    = bimax[row];
4673       nrow2    = bilen[row];
4674       low2     = 0;
4675       high2    = nrow2;
4676 
4677       for (j=0; j<n; j++) {
4678         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4679         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4680         if (in[j] >= cstart && in[j] < cend){
4681           col = in[j] - cstart;
4682           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4683         } else if (in[j] < 0) continue;
4684 #if defined(PETSC_USE_DEBUG)
4685         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4686 #endif
4687         else {
4688           if (mat->was_assembled) {
4689             if (!aij->colmap) {
4690               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4691             }
4692 #if defined (PETSC_USE_CTABLE)
4693             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4694 	    col--;
4695 #else
4696             col = aij->colmap[in[j]] - 1;
4697 #endif
4698             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4699               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4700               col =  in[j];
4701               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4702               B = aij->B;
4703               b = (Mat_SeqAIJ*)B->data;
4704               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4705               rp2      = bj + bi[row];
4706               ap2      = ba + bi[row];
4707               rmax2    = bimax[row];
4708               nrow2    = bilen[row];
4709               low2     = 0;
4710               high2    = nrow2;
4711               bm       = aij->B->rmap.n;
4712               ba = b->a;
4713             }
4714           } else col = in[j];
4715           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4716         }
4717       }
4718     } else {
4719       if (!aij->donotstash) {
4720         if (roworiented) {
4721           if (ignorezeroentries && v[i*n] == 0.0) continue;
4722           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4723         } else {
4724           if (ignorezeroentries && v[i] == 0.0) continue;
4725           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4726         }
4727       }
4728     }
4729   }}
4730   PetscFunctionReturnVoid();
4731 }
4732 EXTERN_C_END
4733 
4734