xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 03bc72f194ef8c2a2b9697e0a55e5a593161e903)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NEW_NONZERO_LOCATIONS:
1207   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1208   case MAT_KEEP_ZEROED_ROWS:
1209   case MAT_NEW_NONZERO_LOCATION_ERR:
1210   case MAT_USE_INODES:
1211   case MAT_IGNORE_ZERO_ENTRIES:
1212     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1214     break;
1215   case MAT_ROW_ORIENTED:
1216     a->roworiented = flg;
1217     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1219     break;
1220   case MAT_NEW_DIAGONALS:
1221     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1222     break;
1223   case MAT_IGNORE_OFF_PROC_ENTRIES:
1224     a->donotstash = PETSC_TRUE;
1225     break;
1226   case MAT_SYMMETRIC:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   case MAT_STRUCTURALLY_SYMMETRIC:
1230   case MAT_HERMITIAN:
1231   case MAT_SYMMETRY_ETERNAL:
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     break;
1234   default:
1235     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatGetRow_MPIAIJ"
1242 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1243 {
1244   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1245   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1246   PetscErrorCode ierr;
1247   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1248   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1249   PetscInt       *cmap,*idx_p;
1250 
1251   PetscFunctionBegin;
1252   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1253   mat->getrowactive = PETSC_TRUE;
1254 
1255   if (!mat->rowvalues && (idx || v)) {
1256     /*
1257         allocate enough space to hold information from the longest row.
1258     */
1259     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1260     PetscInt     max = 1,tmp;
1261     for (i=0; i<matin->rmap.n; i++) {
1262       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1263       if (max < tmp) { max = tmp; }
1264     }
1265     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1266     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1267   }
1268 
1269   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1270   lrow = row - rstart;
1271 
1272   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1273   if (!v)   {pvA = 0; pvB = 0;}
1274   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1275   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1276   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1277   nztot = nzA + nzB;
1278 
1279   cmap  = mat->garray;
1280   if (v  || idx) {
1281     if (nztot) {
1282       /* Sort by increasing column numbers, assuming A and B already sorted */
1283       PetscInt imark = -1;
1284       if (v) {
1285         *v = v_p = mat->rowvalues;
1286         for (i=0; i<nzB; i++) {
1287           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1288           else break;
1289         }
1290         imark = i;
1291         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1292         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1293       }
1294       if (idx) {
1295         *idx = idx_p = mat->rowindices;
1296         if (imark > -1) {
1297           for (i=0; i<imark; i++) {
1298             idx_p[i] = cmap[cworkB[i]];
1299           }
1300         } else {
1301           for (i=0; i<nzB; i++) {
1302             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1303             else break;
1304           }
1305           imark = i;
1306         }
1307         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1308         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1309       }
1310     } else {
1311       if (idx) *idx = 0;
1312       if (v)   *v   = 0;
1313     }
1314   }
1315   *nz = nztot;
1316   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1317   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1323 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1326 
1327   PetscFunctionBegin;
1328   if (!aij->getrowactive) {
1329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1330   }
1331   aij->getrowactive = PETSC_FALSE;
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatNorm_MPIAIJ"
1337 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1338 {
1339   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1340   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1341   PetscErrorCode ierr;
1342   PetscInt       i,j,cstart = mat->cmap.rstart;
1343   PetscReal      sum = 0.0;
1344   PetscScalar    *v;
1345 
1346   PetscFunctionBegin;
1347   if (aij->size == 1) {
1348     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1349   } else {
1350     if (type == NORM_FROBENIUS) {
1351       v = amat->a;
1352       for (i=0; i<amat->nz; i++) {
1353 #if defined(PETSC_USE_COMPLEX)
1354         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1355 #else
1356         sum += (*v)*(*v); v++;
1357 #endif
1358       }
1359       v = bmat->a;
1360       for (i=0; i<bmat->nz; i++) {
1361 #if defined(PETSC_USE_COMPLEX)
1362         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1363 #else
1364         sum += (*v)*(*v); v++;
1365 #endif
1366       }
1367       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1368       *norm = sqrt(*norm);
1369     } else if (type == NORM_1) { /* max column norm */
1370       PetscReal *tmp,*tmp2;
1371       PetscInt    *jj,*garray = aij->garray;
1372       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1373       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1374       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1375       *norm = 0.0;
1376       v = amat->a; jj = amat->j;
1377       for (j=0; j<amat->nz; j++) {
1378         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1379       }
1380       v = bmat->a; jj = bmat->j;
1381       for (j=0; j<bmat->nz; j++) {
1382         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1383       }
1384       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1385       for (j=0; j<mat->cmap.N; j++) {
1386         if (tmp2[j] > *norm) *norm = tmp2[j];
1387       }
1388       ierr = PetscFree(tmp);CHKERRQ(ierr);
1389       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1390     } else if (type == NORM_INFINITY) { /* max row norm */
1391       PetscReal ntemp = 0.0;
1392       for (j=0; j<aij->A->rmap.n; j++) {
1393         v = amat->a + amat->i[j];
1394         sum = 0.0;
1395         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1396           sum += PetscAbsScalar(*v); v++;
1397         }
1398         v = bmat->a + bmat->i[j];
1399         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1400           sum += PetscAbsScalar(*v); v++;
1401         }
1402         if (sum > ntemp) ntemp = sum;
1403       }
1404       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1405     } else {
1406       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1407     }
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatTranspose_MPIAIJ"
1414 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1415 {
1416   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1417   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1418   PetscErrorCode ierr;
1419   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1420   PetscInt       cstart=A->cmap.rstart,ncol;
1421   Mat            B;
1422   PetscScalar    *array;
1423 
1424   PetscFunctionBegin;
1425   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1426 
1427   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1428   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1429   ai = Aloc->i; aj = Aloc->j;
1430   bi = Bloc->i; bj = Bloc->j;
1431   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1432   cols = d_nnz + na + 1; /* work space to be used by B part */
1433   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1434   for (i=0; i<ai[ma]; i++){
1435     d_nnz[aj[i]] ++;
1436     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1437   }
1438 
1439   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1440   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1441   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1442   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1443 
1444   /* copy over the A part */
1445   array = Aloc->a;
1446   row = A->rmap.rstart;
1447   for (i=0; i<ma; i++) {
1448     ncol = ai[i+1]-ai[i];
1449     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1450     row++; array += ncol; aj += ncol;
1451   }
1452   aj = Aloc->j;
1453   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1454 
1455   /* copy over the B part */
1456   array = Bloc->a;
1457   row = A->rmap.rstart;
1458   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1459   for (i=0; i<mb; i++) {
1460     ncol = bi[i+1]-bi[i];
1461     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1462     row++; array += ncol; cols += ncol;
1463   }
1464   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1467   if (matout) {
1468     *matout = B;
1469   } else {
1470     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1477 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1478 {
1479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1480   Mat            a = aij->A,b = aij->B;
1481   PetscErrorCode ierr;
1482   PetscInt       s1,s2,s3;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1486   if (rr) {
1487     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1488     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1489     /* Overlap communication with computation. */
1490     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491   }
1492   if (ll) {
1493     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1494     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1495     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1496   }
1497   /* scale  the diagonal block */
1498   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1499 
1500   if (rr) {
1501     /* Do a scatter end and then right scale the off-diagonal block */
1502     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1504   }
1505 
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1511 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1512 {
1513   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1518   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1523 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1524 {
1525   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatEqual_MPIAIJ"
1535 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1536 {
1537   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1538   Mat            a,b,c,d;
1539   PetscTruth     flg;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   a = matA->A; b = matA->B;
1544   c = matB->A; d = matB->B;
1545 
1546   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1547   if (flg) {
1548     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1549   }
1550   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatCopy_MPIAIJ"
1556 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1557 {
1558   PetscErrorCode ierr;
1559   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1560   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1561 
1562   PetscFunctionBegin;
1563   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1564   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1565     /* because of the column compression in the off-processor part of the matrix a->B,
1566        the number of columns in a->B and b->B may be different, hence we cannot call
1567        the MatCopy() directly on the two parts. If need be, we can provide a more
1568        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1569        then copying the submatrices */
1570     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1571   } else {
1572     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1573     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1580 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1581 {
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #include "petscblaslapack.h"
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatAXPY_MPIAIJ"
1592 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1593 {
1594   PetscErrorCode ierr;
1595   PetscInt       i;
1596   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1597   PetscBLASInt   bnz,one=1;
1598   Mat_SeqAIJ     *x,*y;
1599 
1600   PetscFunctionBegin;
1601   if (str == SAME_NONZERO_PATTERN) {
1602     PetscScalar alpha = a;
1603     x = (Mat_SeqAIJ *)xx->A->data;
1604     y = (Mat_SeqAIJ *)yy->A->data;
1605     bnz = (PetscBLASInt)x->nz;
1606     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1607     x = (Mat_SeqAIJ *)xx->B->data;
1608     y = (Mat_SeqAIJ *)yy->B->data;
1609     bnz = (PetscBLASInt)x->nz;
1610     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1611   } else if (str == SUBSET_NONZERO_PATTERN) {
1612     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1613 
1614     x = (Mat_SeqAIJ *)xx->B->data;
1615     y = (Mat_SeqAIJ *)yy->B->data;
1616     if (y->xtoy && y->XtoY != xx->B) {
1617       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1618       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1619     }
1620     if (!y->xtoy) { /* get xtoy */
1621       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1622       y->XtoY = xx->B;
1623       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1624     }
1625     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1626   } else {
1627     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatConjugate_MPIAIJ"
1636 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1637 {
1638 #if defined(PETSC_USE_COMPLEX)
1639   PetscErrorCode ierr;
1640   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1641 
1642   PetscFunctionBegin;
1643   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1644   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1645 #else
1646   PetscFunctionBegin;
1647 #endif
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatRealPart_MPIAIJ"
1653 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1654 {
1655   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1656   PetscErrorCode ierr;
1657 
1658   PetscFunctionBegin;
1659   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1660   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1666 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1667 {
1668   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1673   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #ifdef PETSC_HAVE_PBGL
1678 
1679 #include <boost/parallel/mpi/bsp_process_group.hpp>
1680 #include <boost/graph/distributed/ilu_default_graph.hpp>
1681 #include <boost/graph/distributed/ilu_0_block.hpp>
1682 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1683 #include <boost/graph/distributed/petsc/interface.hpp>
1684 #include <boost/multi_array.hpp>
1685 #include <boost/parallel/distributed_property_map.hpp>
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1689 /*
1690   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1691 */
1692 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1693 {
1694   namespace petsc = boost::distributed::petsc;
1695 
1696   namespace graph_dist = boost::graph::distributed;
1697   using boost::graph::distributed::ilu_default::process_group_type;
1698   using boost::graph::ilu_permuted;
1699 
1700   PetscTruth      row_identity, col_identity;
1701   PetscContainer  c;
1702   PetscInt        m, n, M, N;
1703   PetscErrorCode  ierr;
1704 
1705   PetscFunctionBegin;
1706   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1707   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1708   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1709   if (!row_identity || !col_identity) {
1710     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1711   }
1712 
1713   process_group_type pg;
1714   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1715   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1716   lgraph_type&   level_graph = *lgraph_p;
1717   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1718 
1719   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1720   ilu_permuted(level_graph);
1721 
1722   /* put together the new matrix */
1723   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1724   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1725   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1726   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1727   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1728   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1729   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1730   (*fact)->factor = FACTOR_LU;
1731 
1732   ierr = PetscContainerCreate(A->comm, &c);
1733   ierr = PetscContainerSetPointer(c, lgraph_p);
1734   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1740 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1741 {
1742   PetscFunctionBegin;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSolve_MPIAIJ"
1748 /*
1749   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1750 */
1751 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1752 {
1753   namespace graph_dist = boost::graph::distributed;
1754 
1755   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1756   lgraph_type*   lgraph_p;
1757   PetscContainer c;
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1762   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1763   ierr = VecCopy(b, x); CHKERRQ(ierr);
1764 
1765   PetscScalar* array_x;
1766   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1767   PetscInt sx;
1768   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1769 
1770   PetscScalar* array_b;
1771   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1772   PetscInt sb;
1773   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1774 
1775   lgraph_type&   level_graph = *lgraph_p;
1776   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1777 
1778   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1779   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1780                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1781 
1782   typedef boost::iterator_property_map<array_ref_type::iterator,
1783                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1784   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1785                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1786 
1787   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1788 
1789   PetscFunctionReturn(0);
1790 }
1791 #endif
1792 
1793 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1794   PetscInt       nzlocal,nsends,nrecvs;
1795   PetscMPIInt    *send_rank;
1796   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1797   PetscScalar    *sbuf_a,**rbuf_a;
1798   PetscErrorCode (*MatDestroy)(Mat);
1799 } Mat_Redundant;
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1803 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1804 {
1805   PetscErrorCode       ierr;
1806   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1807   PetscInt             i;
1808 
1809   PetscFunctionBegin;
1810   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1811   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1812   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1813   for (i=0; i<redund->nrecvs; i++){
1814     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1815     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1816   }
1817   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1818   ierr = PetscFree(redund);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatDestroy_MatRedundant"
1824 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1825 {
1826   PetscErrorCode  ierr;
1827   PetscContainer  container;
1828   Mat_Redundant   *redund=PETSC_NULL;
1829 
1830   PetscFunctionBegin;
1831   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1832   if (container) {
1833     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1834   } else {
1835     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1836   }
1837   A->ops->destroy = redund->MatDestroy;
1838   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1839   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1840   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1846 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1847 {
1848   PetscMPIInt    rank,size;
1849   MPI_Comm       comm=mat->comm;
1850   PetscErrorCode ierr;
1851   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1852   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1853   PetscInt       *rowrange=mat->rmap.range;
1854   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1855   Mat            A=aij->A,B=aij->B,C=*matredundant;
1856   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1857   PetscScalar    *sbuf_a;
1858   PetscInt       nzlocal=a->nz+b->nz;
1859   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1860   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1861   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1862   PetscScalar    *vals,*aworkA,*aworkB;
1863   PetscMPIInt    tag1,tag2,tag3,imdex;
1864   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1865                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1866   MPI_Status     recv_status,*send_status;
1867   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1868   PetscInt       **rbuf_j=PETSC_NULL;
1869   PetscScalar    **rbuf_a=PETSC_NULL;
1870   Mat_Redundant  *redund=PETSC_NULL;
1871   PetscContainer container;
1872 
1873   PetscFunctionBegin;
1874   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1875   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1876 
1877   if (reuse == MAT_REUSE_MATRIX) {
1878     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1879     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1880     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1881     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1882     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1883     if (container) {
1884       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1885     } else {
1886       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1887     }
1888     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1889 
1890     nsends    = redund->nsends;
1891     nrecvs    = redund->nrecvs;
1892     send_rank = redund->send_rank; recv_rank = send_rank + size;
1893     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1894     sbuf_j    = redund->sbuf_j;
1895     sbuf_a    = redund->sbuf_a;
1896     rbuf_j    = redund->rbuf_j;
1897     rbuf_a    = redund->rbuf_a;
1898   }
1899 
1900   if (reuse == MAT_INITIAL_MATRIX){
1901     PetscMPIInt  subrank,subsize;
1902     PetscInt     nleftover,np_subcomm;
1903     /* get the destination processors' id send_rank, nsends and nrecvs */
1904     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1905     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1906     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1907     recv_rank = send_rank + size;
1908     np_subcomm = size/nsubcomm;
1909     nleftover  = size - nsubcomm*np_subcomm;
1910     nsends = 0; nrecvs = 0;
1911     for (i=0; i<size; i++){ /* i=rank*/
1912       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1913         send_rank[nsends] = i; nsends++;
1914         recv_rank[nrecvs++] = i;
1915       }
1916     }
1917     if (rank >= size - nleftover){/* this proc is a leftover processor */
1918       i = size-nleftover-1;
1919       j = 0;
1920       while (j < nsubcomm - nleftover){
1921         send_rank[nsends++] = i;
1922         i--; j++;
1923       }
1924     }
1925 
1926     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1927       for (i=0; i<nleftover; i++){
1928         recv_rank[nrecvs++] = size-nleftover+i;
1929       }
1930     }
1931 
1932     /* allocate sbuf_j, sbuf_a */
1933     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1934     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1935     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1936   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1937 
1938   /* copy mat's local entries into the buffers */
1939   if (reuse == MAT_INITIAL_MATRIX){
1940     rownz_max = 0;
1941     rptr = sbuf_j;
1942     cols = sbuf_j + rend-rstart + 1;
1943     vals = sbuf_a;
1944     rptr[0] = 0;
1945     for (i=0; i<rend-rstart; i++){
1946       row = i + rstart;
1947       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1948       ncols  = nzA + nzB;
1949       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1950       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1951       /* load the column indices for this row into cols */
1952       lwrite = 0;
1953       for (l=0; l<nzB; l++) {
1954         if ((ctmp = bmap[cworkB[l]]) < cstart){
1955           vals[lwrite]   = aworkB[l];
1956           cols[lwrite++] = ctmp;
1957         }
1958       }
1959       for (l=0; l<nzA; l++){
1960         vals[lwrite]   = aworkA[l];
1961         cols[lwrite++] = cstart + cworkA[l];
1962       }
1963       for (l=0; l<nzB; l++) {
1964         if ((ctmp = bmap[cworkB[l]]) >= cend){
1965           vals[lwrite]   = aworkB[l];
1966           cols[lwrite++] = ctmp;
1967         }
1968       }
1969       vals += ncols;
1970       cols += ncols;
1971       rptr[i+1] = rptr[i] + ncols;
1972       if (rownz_max < ncols) rownz_max = ncols;
1973     }
1974     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1975   } else { /* only copy matrix values into sbuf_a */
1976     rptr = sbuf_j;
1977     vals = sbuf_a;
1978     rptr[0] = 0;
1979     for (i=0; i<rend-rstart; i++){
1980       row = i + rstart;
1981       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1982       ncols  = nzA + nzB;
1983       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1984       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1985       lwrite = 0;
1986       for (l=0; l<nzB; l++) {
1987         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1988       }
1989       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1990       for (l=0; l<nzB; l++) {
1991         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1992       }
1993       vals += ncols;
1994       rptr[i+1] = rptr[i] + ncols;
1995     }
1996   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1997 
1998   /* send nzlocal to others, and recv other's nzlocal */
1999   /*--------------------------------------------------*/
2000   if (reuse == MAT_INITIAL_MATRIX){
2001     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2002     s_waits2 = s_waits3 + nsends;
2003     s_waits1 = s_waits2 + nsends;
2004     r_waits1 = s_waits1 + nsends;
2005     r_waits2 = r_waits1 + nrecvs;
2006     r_waits3 = r_waits2 + nrecvs;
2007   } else {
2008     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2009     r_waits3 = s_waits3 + nsends;
2010   }
2011 
2012   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2013   if (reuse == MAT_INITIAL_MATRIX){
2014     /* get new tags to keep the communication clean */
2015     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2016     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2017     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2018     rbuf_nz = sbuf_nz + nsends;
2019 
2020     /* post receives of other's nzlocal */
2021     for (i=0; i<nrecvs; i++){
2022       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2023     }
2024     /* send nzlocal to others */
2025     for (i=0; i<nsends; i++){
2026       sbuf_nz[i] = nzlocal;
2027       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2028     }
2029     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2030     count = nrecvs;
2031     while (count) {
2032       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2033       recv_rank[imdex] = recv_status.MPI_SOURCE;
2034       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2035       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2036 
2037       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2038       rbuf_nz[imdex] += i + 2;
2039       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2040       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2041       count--;
2042     }
2043     /* wait on sends of nzlocal */
2044     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2045     /* send mat->i,j to others, and recv from other's */
2046     /*------------------------------------------------*/
2047     for (i=0; i<nsends; i++){
2048       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2049       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2050     }
2051     /* wait on receives of mat->i,j */
2052     /*------------------------------*/
2053     count = nrecvs;
2054     while (count) {
2055       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2056       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2057       count--;
2058     }
2059     /* wait on sends of mat->i,j */
2060     /*---------------------------*/
2061     if (nsends) {
2062       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2063     }
2064   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2065 
2066   /* post receives, send and receive mat->a */
2067   /*----------------------------------------*/
2068   for (imdex=0; imdex<nrecvs; imdex++) {
2069     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2070   }
2071   for (i=0; i<nsends; i++){
2072     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2073   }
2074   count = nrecvs;
2075   while (count) {
2076     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2077     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2078     count--;
2079   }
2080   if (nsends) {
2081     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2082   }
2083 
2084   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2085 
2086   /* create redundant matrix */
2087   /*-------------------------*/
2088   if (reuse == MAT_INITIAL_MATRIX){
2089     /* compute rownz_max for preallocation */
2090     for (imdex=0; imdex<nrecvs; imdex++){
2091       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2092       rptr = rbuf_j[imdex];
2093       for (i=0; i<j; i++){
2094         ncols = rptr[i+1] - rptr[i];
2095         if (rownz_max < ncols) rownz_max = ncols;
2096       }
2097     }
2098 
2099     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2100     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2101     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2102     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2103     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2104   } else {
2105     C = *matredundant;
2106   }
2107 
2108   /* insert local matrix entries */
2109   rptr = sbuf_j;
2110   cols = sbuf_j + rend-rstart + 1;
2111   vals = sbuf_a;
2112   for (i=0; i<rend-rstart; i++){
2113     row   = i + rstart;
2114     ncols = rptr[i+1] - rptr[i];
2115     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2116     vals += ncols;
2117     cols += ncols;
2118   }
2119   /* insert received matrix entries */
2120   for (imdex=0; imdex<nrecvs; imdex++){
2121     rstart = rowrange[recv_rank[imdex]];
2122     rend   = rowrange[recv_rank[imdex]+1];
2123     rptr = rbuf_j[imdex];
2124     cols = rbuf_j[imdex] + rend-rstart + 1;
2125     vals = rbuf_a[imdex];
2126     for (i=0; i<rend-rstart; i++){
2127       row   = i + rstart;
2128       ncols = rptr[i+1] - rptr[i];
2129       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2130       vals += ncols;
2131       cols += ncols;
2132     }
2133   }
2134   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2137   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2138   if (reuse == MAT_INITIAL_MATRIX){
2139     PetscContainer container;
2140     *matredundant = C;
2141     /* create a supporting struct and attach it to C for reuse */
2142     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2143     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2144     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2145     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2146     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2147 
2148     redund->nzlocal = nzlocal;
2149     redund->nsends  = nsends;
2150     redund->nrecvs  = nrecvs;
2151     redund->send_rank = send_rank;
2152     redund->sbuf_nz = sbuf_nz;
2153     redund->sbuf_j  = sbuf_j;
2154     redund->sbuf_a  = sbuf_a;
2155     redund->rbuf_j  = rbuf_j;
2156     redund->rbuf_a  = rbuf_a;
2157 
2158     redund->MatDestroy = C->ops->destroy;
2159     C->ops->destroy    = MatDestroy_MatRedundant;
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 #undef __FUNCT__
2165 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2166 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2167 {
2168   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2169   PetscInt       n      = A->rmap.n;
2170   PetscInt       cstart = A->cmap.rstart;
2171   PetscInt      *cmap   = mat->garray;
2172   PetscInt      *diagIdx, *offdiagIdx;
2173   Vec            diagV, offdiagV;
2174   PetscScalar   *a, *diagA, *offdiagA;
2175   PetscInt       r;
2176   PetscErrorCode ierr;
2177 
2178   PetscFunctionBegin;
2179   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2180   ierr = VecCreateSeq(A->comm, n, &diagV);CHKERRQ(ierr);
2181   ierr = VecCreateSeq(A->comm, n, &offdiagV);CHKERRQ(ierr);
2182   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2183   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2184   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2185   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2186   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2187   for(r = 0; r < n; ++r) {
2188     if (diagA[r] <= offdiagA[r]) {
2189       a[r]   = diagA[r];
2190       idx[r] = cstart + diagIdx[r];
2191     } else {
2192       a[r]   = offdiagA[r];
2193       idx[r] = cmap[offdiagIdx[r]];
2194     }
2195   }
2196   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2197   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2198   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2199   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2200   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2201   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 /* -------------------------------------------------------------------*/
2206 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2207        MatGetRow_MPIAIJ,
2208        MatRestoreRow_MPIAIJ,
2209        MatMult_MPIAIJ,
2210 /* 4*/ MatMultAdd_MPIAIJ,
2211        MatMultTranspose_MPIAIJ,
2212        MatMultTransposeAdd_MPIAIJ,
2213 #ifdef PETSC_HAVE_PBGL
2214        MatSolve_MPIAIJ,
2215 #else
2216        0,
2217 #endif
2218        0,
2219        0,
2220 /*10*/ 0,
2221        0,
2222        0,
2223        MatRelax_MPIAIJ,
2224        MatTranspose_MPIAIJ,
2225 /*15*/ MatGetInfo_MPIAIJ,
2226        MatEqual_MPIAIJ,
2227        MatGetDiagonal_MPIAIJ,
2228        MatDiagonalScale_MPIAIJ,
2229        MatNorm_MPIAIJ,
2230 /*20*/ MatAssemblyBegin_MPIAIJ,
2231        MatAssemblyEnd_MPIAIJ,
2232        0,
2233        MatSetOption_MPIAIJ,
2234        MatZeroEntries_MPIAIJ,
2235 /*25*/ MatZeroRows_MPIAIJ,
2236        0,
2237 #ifdef PETSC_HAVE_PBGL
2238        MatLUFactorNumeric_MPIAIJ,
2239 #else
2240        0,
2241 #endif
2242        0,
2243        0,
2244 /*30*/ MatSetUpPreallocation_MPIAIJ,
2245 #ifdef PETSC_HAVE_PBGL
2246        MatILUFactorSymbolic_MPIAIJ,
2247 #else
2248        0,
2249 #endif
2250        0,
2251        0,
2252        0,
2253 /*35*/ MatDuplicate_MPIAIJ,
2254        0,
2255        0,
2256        0,
2257        0,
2258 /*40*/ MatAXPY_MPIAIJ,
2259        MatGetSubMatrices_MPIAIJ,
2260        MatIncreaseOverlap_MPIAIJ,
2261        MatGetValues_MPIAIJ,
2262        MatCopy_MPIAIJ,
2263 /*45*/ 0,
2264        MatScale_MPIAIJ,
2265        0,
2266        0,
2267        0,
2268 /*50*/ MatSetBlockSize_MPIAIJ,
2269        0,
2270        0,
2271        0,
2272        0,
2273 /*55*/ MatFDColoringCreate_MPIAIJ,
2274        0,
2275        MatSetUnfactored_MPIAIJ,
2276        MatPermute_MPIAIJ,
2277        0,
2278 /*60*/ MatGetSubMatrix_MPIAIJ,
2279        MatDestroy_MPIAIJ,
2280        MatView_MPIAIJ,
2281        0,
2282        0,
2283 /*65*/ 0,
2284        0,
2285        0,
2286        0,
2287        0,
2288 /*70*/ 0,
2289        0,
2290        MatSetColoring_MPIAIJ,
2291 #if defined(PETSC_HAVE_ADIC)
2292        MatSetValuesAdic_MPIAIJ,
2293 #else
2294        0,
2295 #endif
2296        MatSetValuesAdifor_MPIAIJ,
2297 /*75*/ 0,
2298        0,
2299        0,
2300        0,
2301        0,
2302 /*80*/ 0,
2303        0,
2304        0,
2305        0,
2306 /*84*/ MatLoad_MPIAIJ,
2307        0,
2308        0,
2309        0,
2310        0,
2311        0,
2312 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2313        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2314        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2315        MatPtAP_Basic,
2316        MatPtAPSymbolic_MPIAIJ,
2317 /*95*/ MatPtAPNumeric_MPIAIJ,
2318        0,
2319        0,
2320        0,
2321        0,
2322 /*100*/0,
2323        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2324        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2325        MatConjugate_MPIAIJ,
2326        0,
2327 /*105*/MatSetValuesRow_MPIAIJ,
2328        MatRealPart_MPIAIJ,
2329        MatImaginaryPart_MPIAIJ,
2330        0,
2331        0,
2332 /*110*/0,
2333        MatGetRedundantMatrix_MPIAIJ,
2334        MatGetRowMin_MPIAIJ};
2335 
2336 /* ----------------------------------------------------------------------------------------*/
2337 
2338 EXTERN_C_BEGIN
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2341 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2342 {
2343   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2344   PetscErrorCode ierr;
2345 
2346   PetscFunctionBegin;
2347   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2348   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 EXTERN_C_END
2352 
2353 EXTERN_C_BEGIN
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2356 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2357 {
2358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2359   PetscErrorCode ierr;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2363   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 EXTERN_C_END
2367 
2368 #include "petscpc.h"
2369 EXTERN_C_BEGIN
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2372 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2373 {
2374   Mat_MPIAIJ     *b;
2375   PetscErrorCode ierr;
2376   PetscInt       i;
2377 
2378   PetscFunctionBegin;
2379   B->preallocated = PETSC_TRUE;
2380   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2381   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2382   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2383   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2384 
2385   B->rmap.bs = B->cmap.bs = 1;
2386   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2387   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2388   if (d_nnz) {
2389     for (i=0; i<B->rmap.n; i++) {
2390       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2391     }
2392   }
2393   if (o_nnz) {
2394     for (i=0; i<B->rmap.n; i++) {
2395       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2396     }
2397   }
2398   b = (Mat_MPIAIJ*)B->data;
2399 
2400   /* Explicitly create 2 MATSEQAIJ matrices. */
2401   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2402   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2403   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2404   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2405   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2406   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2407   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2408   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2409 
2410   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2411   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2412 
2413   PetscFunctionReturn(0);
2414 }
2415 EXTERN_C_END
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2419 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2420 {
2421   Mat            mat;
2422   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2423   PetscErrorCode ierr;
2424 
2425   PetscFunctionBegin;
2426   *newmat       = 0;
2427   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2428   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2429   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2430   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2431   a    = (Mat_MPIAIJ*)mat->data;
2432 
2433   mat->factor       = matin->factor;
2434   mat->rmap.bs      = matin->rmap.bs;
2435   mat->assembled    = PETSC_TRUE;
2436   mat->insertmode   = NOT_SET_VALUES;
2437   mat->preallocated = PETSC_TRUE;
2438 
2439   a->size           = oldmat->size;
2440   a->rank           = oldmat->rank;
2441   a->donotstash     = oldmat->donotstash;
2442   a->roworiented    = oldmat->roworiented;
2443   a->rowindices     = 0;
2444   a->rowvalues      = 0;
2445   a->getrowactive   = PETSC_FALSE;
2446 
2447   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2448   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2449 
2450   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2451   if (oldmat->colmap) {
2452 #if defined (PETSC_USE_CTABLE)
2453     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2454 #else
2455     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2456     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2457     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2458 #endif
2459   } else a->colmap = 0;
2460   if (oldmat->garray) {
2461     PetscInt len;
2462     len  = oldmat->B->cmap.n;
2463     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2464     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2465     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2466   } else a->garray = 0;
2467 
2468   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2469   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2470   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2471   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2472   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2473   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2474   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2475   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2476   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2477   *newmat = mat;
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #include "petscsys.h"
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatLoad_MPIAIJ"
2485 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2486 {
2487   Mat            A;
2488   PetscScalar    *vals,*svals;
2489   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2490   MPI_Status     status;
2491   PetscErrorCode ierr;
2492   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2493   PetscInt       i,nz,j,rstart,rend,mmax;
2494   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2495   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2496   PetscInt       cend,cstart,n,*rowners;
2497   int            fd;
2498 
2499   PetscFunctionBegin;
2500   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2501   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2502   if (!rank) {
2503     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2504     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2505     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2506   }
2507 
2508   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2509   M = header[1]; N = header[2];
2510   /* determine ownership of all rows */
2511   m    = M/size + ((M % size) > rank);
2512   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2513   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2514 
2515   /* First process needs enough room for process with most rows */
2516   if (!rank) {
2517     mmax       = rowners[1];
2518     for (i=2; i<size; i++) {
2519       mmax = PetscMax(mmax,rowners[i]);
2520     }
2521   } else mmax = m;
2522 
2523   rowners[0] = 0;
2524   for (i=2; i<=size; i++) {
2525     rowners[i] += rowners[i-1];
2526   }
2527   rstart = rowners[rank];
2528   rend   = rowners[rank+1];
2529 
2530   /* distribute row lengths to all processors */
2531   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2532   if (!rank) {
2533     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2534     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2535     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2536     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2537     for (j=0; j<m; j++) {
2538       procsnz[0] += ourlens[j];
2539     }
2540     for (i=1; i<size; i++) {
2541       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2542       /* calculate the number of nonzeros on each processor */
2543       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2544         procsnz[i] += rowlengths[j];
2545       }
2546       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2547     }
2548     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2549   } else {
2550     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2551   }
2552 
2553   if (!rank) {
2554     /* determine max buffer needed and allocate it */
2555     maxnz = 0;
2556     for (i=0; i<size; i++) {
2557       maxnz = PetscMax(maxnz,procsnz[i]);
2558     }
2559     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2560 
2561     /* read in my part of the matrix column indices  */
2562     nz   = procsnz[0];
2563     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2564     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2565 
2566     /* read in every one elses and ship off */
2567     for (i=1; i<size; i++) {
2568       nz   = procsnz[i];
2569       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2570       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2571     }
2572     ierr = PetscFree(cols);CHKERRQ(ierr);
2573   } else {
2574     /* determine buffer space needed for message */
2575     nz = 0;
2576     for (i=0; i<m; i++) {
2577       nz += ourlens[i];
2578     }
2579     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2580 
2581     /* receive message of column indices*/
2582     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2583     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2584     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2585   }
2586 
2587   /* determine column ownership if matrix is not square */
2588   if (N != M) {
2589     n      = N/size + ((N % size) > rank);
2590     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2591     cstart = cend - n;
2592   } else {
2593     cstart = rstart;
2594     cend   = rend;
2595     n      = cend - cstart;
2596   }
2597 
2598   /* loop over local rows, determining number of off diagonal entries */
2599   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2600   jj = 0;
2601   for (i=0; i<m; i++) {
2602     for (j=0; j<ourlens[i]; j++) {
2603       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2604       jj++;
2605     }
2606   }
2607 
2608   /* create our matrix */
2609   for (i=0; i<m; i++) {
2610     ourlens[i] -= offlens[i];
2611   }
2612   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2613   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2614   ierr = MatSetType(A,type);CHKERRQ(ierr);
2615   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2616 
2617   for (i=0; i<m; i++) {
2618     ourlens[i] += offlens[i];
2619   }
2620 
2621   if (!rank) {
2622     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2623 
2624     /* read in my part of the matrix numerical values  */
2625     nz   = procsnz[0];
2626     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2627 
2628     /* insert into matrix */
2629     jj      = rstart;
2630     smycols = mycols;
2631     svals   = vals;
2632     for (i=0; i<m; i++) {
2633       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2634       smycols += ourlens[i];
2635       svals   += ourlens[i];
2636       jj++;
2637     }
2638 
2639     /* read in other processors and ship out */
2640     for (i=1; i<size; i++) {
2641       nz   = procsnz[i];
2642       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2643       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2644     }
2645     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2646   } else {
2647     /* receive numeric values */
2648     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2649 
2650     /* receive message of values*/
2651     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2652     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2653     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2654 
2655     /* insert into matrix */
2656     jj      = rstart;
2657     smycols = mycols;
2658     svals   = vals;
2659     for (i=0; i<m; i++) {
2660       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2661       smycols += ourlens[i];
2662       svals   += ourlens[i];
2663       jj++;
2664     }
2665   }
2666   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2667   ierr = PetscFree(vals);CHKERRQ(ierr);
2668   ierr = PetscFree(mycols);CHKERRQ(ierr);
2669   ierr = PetscFree(rowners);CHKERRQ(ierr);
2670 
2671   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2672   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2673   *newmat = A;
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2679 /*
2680     Not great since it makes two copies of the submatrix, first an SeqAIJ
2681   in local and then by concatenating the local matrices the end result.
2682   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2683 */
2684 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2685 {
2686   PetscErrorCode ierr;
2687   PetscMPIInt    rank,size;
2688   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2689   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2690   Mat            *local,M,Mreuse;
2691   PetscScalar    *vwork,*aa;
2692   MPI_Comm       comm = mat->comm;
2693   Mat_SeqAIJ     *aij;
2694 
2695 
2696   PetscFunctionBegin;
2697   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2698   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2699 
2700   if (call ==  MAT_REUSE_MATRIX) {
2701     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2702     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2703     local = &Mreuse;
2704     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2705   } else {
2706     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2707     Mreuse = *local;
2708     ierr   = PetscFree(local);CHKERRQ(ierr);
2709   }
2710 
2711   /*
2712       m - number of local rows
2713       n - number of columns (same on all processors)
2714       rstart - first row in new global matrix generated
2715   */
2716   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2717   if (call == MAT_INITIAL_MATRIX) {
2718     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2719     ii  = aij->i;
2720     jj  = aij->j;
2721 
2722     /*
2723         Determine the number of non-zeros in the diagonal and off-diagonal
2724         portions of the matrix in order to do correct preallocation
2725     */
2726 
2727     /* first get start and end of "diagonal" columns */
2728     if (csize == PETSC_DECIDE) {
2729       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2730       if (mglobal == n) { /* square matrix */
2731 	nlocal = m;
2732       } else {
2733         nlocal = n/size + ((n % size) > rank);
2734       }
2735     } else {
2736       nlocal = csize;
2737     }
2738     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2739     rstart = rend - nlocal;
2740     if (rank == size - 1 && rend != n) {
2741       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2742     }
2743 
2744     /* next, compute all the lengths */
2745     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2746     olens = dlens + m;
2747     for (i=0; i<m; i++) {
2748       jend = ii[i+1] - ii[i];
2749       olen = 0;
2750       dlen = 0;
2751       for (j=0; j<jend; j++) {
2752         if (*jj < rstart || *jj >= rend) olen++;
2753         else dlen++;
2754         jj++;
2755       }
2756       olens[i] = olen;
2757       dlens[i] = dlen;
2758     }
2759     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2760     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2761     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2762     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2763     ierr = PetscFree(dlens);CHKERRQ(ierr);
2764   } else {
2765     PetscInt ml,nl;
2766 
2767     M = *newmat;
2768     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2769     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2770     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2771     /*
2772          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2773        rather than the slower MatSetValues().
2774     */
2775     M->was_assembled = PETSC_TRUE;
2776     M->assembled     = PETSC_FALSE;
2777   }
2778   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2779   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2780   ii  = aij->i;
2781   jj  = aij->j;
2782   aa  = aij->a;
2783   for (i=0; i<m; i++) {
2784     row   = rstart + i;
2785     nz    = ii[i+1] - ii[i];
2786     cwork = jj;     jj += nz;
2787     vwork = aa;     aa += nz;
2788     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2789   }
2790 
2791   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2793   *newmat = M;
2794 
2795   /* save submatrix used in processor for next request */
2796   if (call ==  MAT_INITIAL_MATRIX) {
2797     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2798     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2799   }
2800 
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 EXTERN_C_BEGIN
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2807 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2808 {
2809   PetscInt       m,cstart, cend,j,nnz,i,d;
2810   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2811   const PetscInt *JJ;
2812   PetscScalar    *values;
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2817 
2818   B->rmap.bs = B->cmap.bs = 1;
2819   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2820   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2821   m      = B->rmap.n;
2822   cstart = B->cmap.rstart;
2823   cend   = B->cmap.rend;
2824   rstart = B->rmap.rstart;
2825 
2826   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2827   o_nnz = d_nnz + m;
2828 
2829   for (i=0; i<m; i++) {
2830     nnz     = Ii[i+1]- Ii[i];
2831     JJ      = J + Ii[i];
2832     nnz_max = PetscMax(nnz_max,nnz);
2833     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2834     for (j=0; j<nnz; j++) {
2835       if (*JJ >= cstart) break;
2836       JJ++;
2837     }
2838     d = 0;
2839     for (; j<nnz; j++) {
2840       if (*JJ++ >= cend) break;
2841       d++;
2842     }
2843     d_nnz[i] = d;
2844     o_nnz[i] = nnz - d;
2845   }
2846   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2847   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2848 
2849   if (v) values = (PetscScalar*)v;
2850   else {
2851     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2852     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2853   }
2854 
2855   for (i=0; i<m; i++) {
2856     ii   = i + rstart;
2857     nnz  = Ii[i+1]- Ii[i];
2858     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2859   }
2860   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862 
2863   if (!v) {
2864     ierr = PetscFree(values);CHKERRQ(ierr);
2865   }
2866   PetscFunctionReturn(0);
2867 }
2868 EXTERN_C_END
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2872 /*@
2873    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2874    (the default parallel PETSc format).
2875 
2876    Collective on MPI_Comm
2877 
2878    Input Parameters:
2879 +  B - the matrix
2880 .  i - the indices into j for the start of each local row (starts with zero)
2881 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2882 -  v - optional values in the matrix
2883 
2884    Level: developer
2885 
2886    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2887      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2888 
2889 .keywords: matrix, aij, compressed row, sparse, parallel
2890 
2891 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2892           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2893 @*/
2894 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2895 {
2896   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2897 
2898   PetscFunctionBegin;
2899   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2900   if (f) {
2901     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2902   }
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 #undef __FUNCT__
2907 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2908 /*@C
2909    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2910    (the default parallel PETSc format).  For good matrix assembly performance
2911    the user should preallocate the matrix storage by setting the parameters
2912    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2913    performance can be increased by more than a factor of 50.
2914 
2915    Collective on MPI_Comm
2916 
2917    Input Parameters:
2918 +  A - the matrix
2919 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2920            (same value is used for all local rows)
2921 .  d_nnz - array containing the number of nonzeros in the various rows of the
2922            DIAGONAL portion of the local submatrix (possibly different for each row)
2923            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2924            The size of this array is equal to the number of local rows, i.e 'm'.
2925            You must leave room for the diagonal entry even if it is zero.
2926 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2927            submatrix (same value is used for all local rows).
2928 -  o_nnz - array containing the number of nonzeros in the various rows of the
2929            OFF-DIAGONAL portion of the local submatrix (possibly different for
2930            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2931            structure. The size of this array is equal to the number
2932            of local rows, i.e 'm'.
2933 
2934    If the *_nnz parameter is given then the *_nz parameter is ignored
2935 
2936    The AIJ format (also called the Yale sparse matrix format or
2937    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2938    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2939 
2940    The parallel matrix is partitioned such that the first m0 rows belong to
2941    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2942    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2943 
2944    The DIAGONAL portion of the local submatrix of a processor can be defined
2945    as the submatrix which is obtained by extraction the part corresponding
2946    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2947    first row that belongs to the processor, and r2 is the last row belonging
2948    to the this processor. This is a square mxm matrix. The remaining portion
2949    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2950 
2951    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2952 
2953    Example usage:
2954 
2955    Consider the following 8x8 matrix with 34 non-zero values, that is
2956    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2957    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2958    as follows:
2959 
2960 .vb
2961             1  2  0  |  0  3  0  |  0  4
2962     Proc0   0  5  6  |  7  0  0  |  8  0
2963             9  0 10  | 11  0  0  | 12  0
2964     -------------------------------------
2965            13  0 14  | 15 16 17  |  0  0
2966     Proc1   0 18  0  | 19 20 21  |  0  0
2967             0  0  0  | 22 23  0  | 24  0
2968     -------------------------------------
2969     Proc2  25 26 27  |  0  0 28  | 29  0
2970            30  0  0  | 31 32 33  |  0 34
2971 .ve
2972 
2973    This can be represented as a collection of submatrices as:
2974 
2975 .vb
2976       A B C
2977       D E F
2978       G H I
2979 .ve
2980 
2981    Where the submatrices A,B,C are owned by proc0, D,E,F are
2982    owned by proc1, G,H,I are owned by proc2.
2983 
2984    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2985    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2986    The 'M','N' parameters are 8,8, and have the same values on all procs.
2987 
2988    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2989    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2990    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2991    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2992    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2993    matrix, ans [DF] as another SeqAIJ matrix.
2994 
2995    When d_nz, o_nz parameters are specified, d_nz storage elements are
2996    allocated for every row of the local diagonal submatrix, and o_nz
2997    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2998    One way to choose d_nz and o_nz is to use the max nonzerors per local
2999    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3000    In this case, the values of d_nz,o_nz are:
3001 .vb
3002      proc0 : dnz = 2, o_nz = 2
3003      proc1 : dnz = 3, o_nz = 2
3004      proc2 : dnz = 1, o_nz = 4
3005 .ve
3006    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3007    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3008    for proc3. i.e we are using 12+15+10=37 storage locations to store
3009    34 values.
3010 
3011    When d_nnz, o_nnz parameters are specified, the storage is specified
3012    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3013    In the above case the values for d_nnz,o_nnz are:
3014 .vb
3015      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3016      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3017      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3018 .ve
3019    Here the space allocated is sum of all the above values i.e 34, and
3020    hence pre-allocation is perfect.
3021 
3022    Level: intermediate
3023 
3024 .keywords: matrix, aij, compressed row, sparse, parallel
3025 
3026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3027           MPIAIJ
3028 @*/
3029 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3030 {
3031   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3032 
3033   PetscFunctionBegin;
3034   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3035   if (f) {
3036     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3037   }
3038   PetscFunctionReturn(0);
3039 }
3040 
3041 #undef __FUNCT__
3042 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3043 /*@C
3044      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3045          CSR format the local rows.
3046 
3047    Collective on MPI_Comm
3048 
3049    Input Parameters:
3050 +  comm - MPI communicator
3051 .  m - number of local rows (Cannot be PETSC_DECIDE)
3052 .  n - This value should be the same as the local size used in creating the
3053        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3054        calculated if N is given) For square matrices n is almost always m.
3055 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3056 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3057 .   i - row indices
3058 .   j - column indices
3059 -   a - matrix values
3060 
3061    Output Parameter:
3062 .   mat - the matrix
3063 
3064    Level: intermediate
3065 
3066    Notes:
3067        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3068      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3069      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3070 
3071        The i and j indices are 0 based
3072 
3073 .keywords: matrix, aij, compressed row, sparse, parallel
3074 
3075 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3076           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3077 @*/
3078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3079 {
3080   PetscErrorCode ierr;
3081 
3082  PetscFunctionBegin;
3083   if (i[0]) {
3084     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3085   }
3086   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3087   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3088   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3089   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3090   PetscFunctionReturn(0);
3091 }
3092 
3093 #undef __FUNCT__
3094 #define __FUNCT__ "MatCreateMPIAIJ"
3095 /*@C
3096    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3097    (the default parallel PETSc format).  For good matrix assembly performance
3098    the user should preallocate the matrix storage by setting the parameters
3099    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3100    performance can be increased by more than a factor of 50.
3101 
3102    Collective on MPI_Comm
3103 
3104    Input Parameters:
3105 +  comm - MPI communicator
3106 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3107            This value should be the same as the local size used in creating the
3108            y vector for the matrix-vector product y = Ax.
3109 .  n - This value should be the same as the local size used in creating the
3110        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3111        calculated if N is given) For square matrices n is almost always m.
3112 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3113 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3114 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3115            (same value is used for all local rows)
3116 .  d_nnz - array containing the number of nonzeros in the various rows of the
3117            DIAGONAL portion of the local submatrix (possibly different for each row)
3118            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3119            The size of this array is equal to the number of local rows, i.e 'm'.
3120            You must leave room for the diagonal entry even if it is zero.
3121 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3122            submatrix (same value is used for all local rows).
3123 -  o_nnz - array containing the number of nonzeros in the various rows of the
3124            OFF-DIAGONAL portion of the local submatrix (possibly different for
3125            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3126            structure. The size of this array is equal to the number
3127            of local rows, i.e 'm'.
3128 
3129    Output Parameter:
3130 .  A - the matrix
3131 
3132    Notes:
3133    If the *_nnz parameter is given then the *_nz parameter is ignored
3134 
3135    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3136    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3137    storage requirements for this matrix.
3138 
3139    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3140    processor than it must be used on all processors that share the object for
3141    that argument.
3142 
3143    The user MUST specify either the local or global matrix dimensions
3144    (possibly both).
3145 
3146    The parallel matrix is partitioned across processors such that the
3147    first m0 rows belong to process 0, the next m1 rows belong to
3148    process 1, the next m2 rows belong to process 2 etc.. where
3149    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3150    values corresponding to [m x N] submatrix.
3151 
3152    The columns are logically partitioned with the n0 columns belonging
3153    to 0th partition, the next n1 columns belonging to the next
3154    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3155 
3156    The DIAGONAL portion of the local submatrix on any given processor
3157    is the submatrix corresponding to the rows and columns m,n
3158    corresponding to the given processor. i.e diagonal matrix on
3159    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3160    etc. The remaining portion of the local submatrix [m x (N-n)]
3161    constitute the OFF-DIAGONAL portion. The example below better
3162    illustrates this concept.
3163 
3164    For a square global matrix we define each processor's diagonal portion
3165    to be its local rows and the corresponding columns (a square submatrix);
3166    each processor's off-diagonal portion encompasses the remainder of the
3167    local matrix (a rectangular submatrix).
3168 
3169    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3170 
3171    When calling this routine with a single process communicator, a matrix of
3172    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3173    type of communicator, use the construction mechanism:
3174      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3175 
3176    By default, this format uses inodes (identical nodes) when possible.
3177    We search for consecutive rows with the same nonzero structure, thereby
3178    reusing matrix information to achieve increased efficiency.
3179 
3180    Options Database Keys:
3181 +  -mat_no_inode  - Do not use inodes
3182 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3183 -  -mat_aij_oneindex - Internally use indexing starting at 1
3184         rather than 0.  Note that when calling MatSetValues(),
3185         the user still MUST index entries starting at 0!
3186 
3187 
3188    Example usage:
3189 
3190    Consider the following 8x8 matrix with 34 non-zero values, that is
3191    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3192    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3193    as follows:
3194 
3195 .vb
3196             1  2  0  |  0  3  0  |  0  4
3197     Proc0   0  5  6  |  7  0  0  |  8  0
3198             9  0 10  | 11  0  0  | 12  0
3199     -------------------------------------
3200            13  0 14  | 15 16 17  |  0  0
3201     Proc1   0 18  0  | 19 20 21  |  0  0
3202             0  0  0  | 22 23  0  | 24  0
3203     -------------------------------------
3204     Proc2  25 26 27  |  0  0 28  | 29  0
3205            30  0  0  | 31 32 33  |  0 34
3206 .ve
3207 
3208    This can be represented as a collection of submatrices as:
3209 
3210 .vb
3211       A B C
3212       D E F
3213       G H I
3214 .ve
3215 
3216    Where the submatrices A,B,C are owned by proc0, D,E,F are
3217    owned by proc1, G,H,I are owned by proc2.
3218 
3219    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3220    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3221    The 'M','N' parameters are 8,8, and have the same values on all procs.
3222 
3223    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3224    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3225    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3226    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3227    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3228    matrix, ans [DF] as another SeqAIJ matrix.
3229 
3230    When d_nz, o_nz parameters are specified, d_nz storage elements are
3231    allocated for every row of the local diagonal submatrix, and o_nz
3232    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3233    One way to choose d_nz and o_nz is to use the max nonzerors per local
3234    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3235    In this case, the values of d_nz,o_nz are:
3236 .vb
3237      proc0 : dnz = 2, o_nz = 2
3238      proc1 : dnz = 3, o_nz = 2
3239      proc2 : dnz = 1, o_nz = 4
3240 .ve
3241    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3242    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3243    for proc3. i.e we are using 12+15+10=37 storage locations to store
3244    34 values.
3245 
3246    When d_nnz, o_nnz parameters are specified, the storage is specified
3247    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3248    In the above case the values for d_nnz,o_nnz are:
3249 .vb
3250      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3251      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3252      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3253 .ve
3254    Here the space allocated is sum of all the above values i.e 34, and
3255    hence pre-allocation is perfect.
3256 
3257    Level: intermediate
3258 
3259 .keywords: matrix, aij, compressed row, sparse, parallel
3260 
3261 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3262           MPIAIJ, MatCreateMPIAIJWithArrays()
3263 @*/
3264 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3265 {
3266   PetscErrorCode ierr;
3267   PetscMPIInt    size;
3268 
3269   PetscFunctionBegin;
3270   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3271   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3273   if (size > 1) {
3274     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3275     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3276   } else {
3277     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3278     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3279   }
3280   PetscFunctionReturn(0);
3281 }
3282 
3283 #undef __FUNCT__
3284 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3285 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3286 {
3287   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3288 
3289   PetscFunctionBegin;
3290   *Ad     = a->A;
3291   *Ao     = a->B;
3292   *colmap = a->garray;
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3298 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3299 {
3300   PetscErrorCode ierr;
3301   PetscInt       i;
3302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3303 
3304   PetscFunctionBegin;
3305   if (coloring->ctype == IS_COLORING_GLOBAL) {
3306     ISColoringValue *allcolors,*colors;
3307     ISColoring      ocoloring;
3308 
3309     /* set coloring for diagonal portion */
3310     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3311 
3312     /* set coloring for off-diagonal portion */
3313     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3314     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3315     for (i=0; i<a->B->cmap.n; i++) {
3316       colors[i] = allcolors[a->garray[i]];
3317     }
3318     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3319     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3320     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3321     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3322   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3323     ISColoringValue *colors;
3324     PetscInt        *larray;
3325     ISColoring      ocoloring;
3326 
3327     /* set coloring for diagonal portion */
3328     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3329     for (i=0; i<a->A->cmap.n; i++) {
3330       larray[i] = i + A->cmap.rstart;
3331     }
3332     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3333     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3334     for (i=0; i<a->A->cmap.n; i++) {
3335       colors[i] = coloring->colors[larray[i]];
3336     }
3337     ierr = PetscFree(larray);CHKERRQ(ierr);
3338     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3339     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3340     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3341 
3342     /* set coloring for off-diagonal portion */
3343     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3344     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3345     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3346     for (i=0; i<a->B->cmap.n; i++) {
3347       colors[i] = coloring->colors[larray[i]];
3348     }
3349     ierr = PetscFree(larray);CHKERRQ(ierr);
3350     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3351     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3352     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3353   } else {
3354     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3355   }
3356 
3357   PetscFunctionReturn(0);
3358 }
3359 
3360 #if defined(PETSC_HAVE_ADIC)
3361 #undef __FUNCT__
3362 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3363 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3364 {
3365   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3366   PetscErrorCode ierr;
3367 
3368   PetscFunctionBegin;
3369   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3370   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3371   PetscFunctionReturn(0);
3372 }
3373 #endif
3374 
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3377 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3378 {
3379   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3380   PetscErrorCode ierr;
3381 
3382   PetscFunctionBegin;
3383   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3384   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3385   PetscFunctionReturn(0);
3386 }
3387 
3388 #undef __FUNCT__
3389 #define __FUNCT__ "MatMerge"
3390 /*@C
3391       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3392                  matrices from each processor
3393 
3394     Collective on MPI_Comm
3395 
3396    Input Parameters:
3397 +    comm - the communicators the parallel matrix will live on
3398 .    inmat - the input sequential matrices
3399 .    n - number of local columns (or PETSC_DECIDE)
3400 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3401 
3402    Output Parameter:
3403 .    outmat - the parallel matrix generated
3404 
3405     Level: advanced
3406 
3407    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3408 
3409 @*/
3410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3411 {
3412   PetscErrorCode ierr;
3413   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3414   PetscInt       *indx;
3415   PetscScalar    *values;
3416 
3417   PetscFunctionBegin;
3418   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3419   if (scall == MAT_INITIAL_MATRIX){
3420     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3421     if (n == PETSC_DECIDE){
3422       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3423     }
3424     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3425     rstart -= m;
3426 
3427     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3428     for (i=0;i<m;i++) {
3429       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3430       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3431       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3432     }
3433     /* This routine will ONLY return MPIAIJ type matrix */
3434     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3435     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3436     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3437     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3438     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3439 
3440   } else if (scall == MAT_REUSE_MATRIX){
3441     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3442   } else {
3443     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3444   }
3445 
3446   for (i=0;i<m;i++) {
3447     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3448     Ii    = i + rstart;
3449     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3450     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3451   }
3452   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3453   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3454   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3455 
3456   PetscFunctionReturn(0);
3457 }
3458 
3459 #undef __FUNCT__
3460 #define __FUNCT__ "MatFileSplit"
3461 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3462 {
3463   PetscErrorCode    ierr;
3464   PetscMPIInt       rank;
3465   PetscInt          m,N,i,rstart,nnz;
3466   size_t            len;
3467   const PetscInt    *indx;
3468   PetscViewer       out;
3469   char              *name;
3470   Mat               B;
3471   const PetscScalar *values;
3472 
3473   PetscFunctionBegin;
3474   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3475   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3476   /* Should this be the type of the diagonal block of A? */
3477   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3478   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3479   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3480   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3481   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3482   for (i=0;i<m;i++) {
3483     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3484     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3485     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3486   }
3487   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3488   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3489 
3490   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3491   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3492   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3493   sprintf(name,"%s.%d",outfile,rank);
3494   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3495   ierr = PetscFree(name);
3496   ierr = MatView(B,out);CHKERRQ(ierr);
3497   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3498   ierr = MatDestroy(B);CHKERRQ(ierr);
3499   PetscFunctionReturn(0);
3500 }
3501 
3502 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3505 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3506 {
3507   PetscErrorCode       ierr;
3508   Mat_Merge_SeqsToMPI  *merge;
3509   PetscContainer       container;
3510 
3511   PetscFunctionBegin;
3512   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3513   if (container) {
3514     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3515     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3516     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3517     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3518     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3519     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3520     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3521     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3522     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3523     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3524     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3525     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3526 
3527     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3528     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3529   }
3530   ierr = PetscFree(merge);CHKERRQ(ierr);
3531 
3532   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3533   PetscFunctionReturn(0);
3534 }
3535 
3536 #include "src/mat/utils/freespace.h"
3537 #include "petscbt.h"
3538 static PetscEvent logkey_seqstompinum = 0;
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3541 /*@C
3542       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3543                  matrices from each processor
3544 
3545     Collective on MPI_Comm
3546 
3547    Input Parameters:
3548 +    comm - the communicators the parallel matrix will live on
3549 .    seqmat - the input sequential matrices
3550 .    m - number of local rows (or PETSC_DECIDE)
3551 .    n - number of local columns (or PETSC_DECIDE)
3552 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3553 
3554    Output Parameter:
3555 .    mpimat - the parallel matrix generated
3556 
3557     Level: advanced
3558 
3559    Notes:
3560      The dimensions of the sequential matrix in each processor MUST be the same.
3561      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3562      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3563 @*/
3564 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3565 {
3566   PetscErrorCode       ierr;
3567   MPI_Comm             comm=mpimat->comm;
3568   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3569   PetscMPIInt          size,rank,taga,*len_s;
3570   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3571   PetscInt             proc,m;
3572   PetscInt             **buf_ri,**buf_rj;
3573   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3574   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3575   MPI_Request          *s_waits,*r_waits;
3576   MPI_Status           *status;
3577   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3578   Mat_Merge_SeqsToMPI  *merge;
3579   PetscContainer       container;
3580 
3581   PetscFunctionBegin;
3582   if (!logkey_seqstompinum) {
3583     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3584   }
3585   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3586 
3587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3589 
3590   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3591   if (container) {
3592     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3593   }
3594   bi     = merge->bi;
3595   bj     = merge->bj;
3596   buf_ri = merge->buf_ri;
3597   buf_rj = merge->buf_rj;
3598 
3599   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3600   owners = merge->rowmap.range;
3601   len_s  = merge->len_s;
3602 
3603   /* send and recv matrix values */
3604   /*-----------------------------*/
3605   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3606   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3607 
3608   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3609   for (proc=0,k=0; proc<size; proc++){
3610     if (!len_s[proc]) continue;
3611     i = owners[proc];
3612     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3613     k++;
3614   }
3615 
3616   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3617   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3618   ierr = PetscFree(status);CHKERRQ(ierr);
3619 
3620   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3621   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3622 
3623   /* insert mat values of mpimat */
3624   /*----------------------------*/
3625   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3626   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3627   nextrow = buf_ri_k + merge->nrecv;
3628   nextai  = nextrow + merge->nrecv;
3629 
3630   for (k=0; k<merge->nrecv; k++){
3631     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3632     nrows = *(buf_ri_k[k]);
3633     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3634     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3635   }
3636 
3637   /* set values of ba */
3638   m = merge->rowmap.n;
3639   for (i=0; i<m; i++) {
3640     arow = owners[rank] + i;
3641     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3642     bnzi = bi[i+1] - bi[i];
3643     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3644 
3645     /* add local non-zero vals of this proc's seqmat into ba */
3646     anzi = ai[arow+1] - ai[arow];
3647     aj   = a->j + ai[arow];
3648     aa   = a->a + ai[arow];
3649     nextaj = 0;
3650     for (j=0; nextaj<anzi; j++){
3651       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3652         ba_i[j] += aa[nextaj++];
3653       }
3654     }
3655 
3656     /* add received vals into ba */
3657     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3658       /* i-th row */
3659       if (i == *nextrow[k]) {
3660         anzi = *(nextai[k]+1) - *nextai[k];
3661         aj   = buf_rj[k] + *(nextai[k]);
3662         aa   = abuf_r[k] + *(nextai[k]);
3663         nextaj = 0;
3664         for (j=0; nextaj<anzi; j++){
3665           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3666             ba_i[j] += aa[nextaj++];
3667           }
3668         }
3669         nextrow[k]++; nextai[k]++;
3670       }
3671     }
3672     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3673   }
3674   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3675   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3676 
3677   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3678   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3679   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3680   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3681   PetscFunctionReturn(0);
3682 }
3683 
3684 static PetscEvent logkey_seqstompisym = 0;
3685 #undef __FUNCT__
3686 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3687 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3688 {
3689   PetscErrorCode       ierr;
3690   Mat                  B_mpi;
3691   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3692   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3693   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3694   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3695   PetscInt             len,proc,*dnz,*onz;
3696   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3697   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3698   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3699   MPI_Status           *status;
3700   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3701   PetscBT              lnkbt;
3702   Mat_Merge_SeqsToMPI  *merge;
3703   PetscContainer       container;
3704 
3705   PetscFunctionBegin;
3706   if (!logkey_seqstompisym) {
3707     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3708   }
3709   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3710 
3711   /* make sure it is a PETSc comm */
3712   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3713   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3714   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3715 
3716   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3717   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3718 
3719   /* determine row ownership */
3720   /*---------------------------------------------------------*/
3721   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3722   merge->rowmap.n = m;
3723   merge->rowmap.N = M;
3724   merge->rowmap.bs = 1;
3725   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3726   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3727   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3728 
3729   m      = merge->rowmap.n;
3730   M      = merge->rowmap.N;
3731   owners = merge->rowmap.range;
3732 
3733   /* determine the number of messages to send, their lengths */
3734   /*---------------------------------------------------------*/
3735   len_s  = merge->len_s;
3736 
3737   len = 0;  /* length of buf_si[] */
3738   merge->nsend = 0;
3739   for (proc=0; proc<size; proc++){
3740     len_si[proc] = 0;
3741     if (proc == rank){
3742       len_s[proc] = 0;
3743     } else {
3744       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3745       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3746     }
3747     if (len_s[proc]) {
3748       merge->nsend++;
3749       nrows = 0;
3750       for (i=owners[proc]; i<owners[proc+1]; i++){
3751         if (ai[i+1] > ai[i]) nrows++;
3752       }
3753       len_si[proc] = 2*(nrows+1);
3754       len += len_si[proc];
3755     }
3756   }
3757 
3758   /* determine the number and length of messages to receive for ij-structure */
3759   /*-------------------------------------------------------------------------*/
3760   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3761   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3762 
3763   /* post the Irecv of j-structure */
3764   /*-------------------------------*/
3765   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3766   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3767 
3768   /* post the Isend of j-structure */
3769   /*--------------------------------*/
3770   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3771   sj_waits = si_waits + merge->nsend;
3772 
3773   for (proc=0, k=0; proc<size; proc++){
3774     if (!len_s[proc]) continue;
3775     i = owners[proc];
3776     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3777     k++;
3778   }
3779 
3780   /* receives and sends of j-structure are complete */
3781   /*------------------------------------------------*/
3782   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3783   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3784 
3785   /* send and recv i-structure */
3786   /*---------------------------*/
3787   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3788   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3789 
3790   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3791   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3792   for (proc=0,k=0; proc<size; proc++){
3793     if (!len_s[proc]) continue;
3794     /* form outgoing message for i-structure:
3795          buf_si[0]:                 nrows to be sent
3796                [1:nrows]:           row index (global)
3797                [nrows+1:2*nrows+1]: i-structure index
3798     */
3799     /*-------------------------------------------*/
3800     nrows = len_si[proc]/2 - 1;
3801     buf_si_i    = buf_si + nrows+1;
3802     buf_si[0]   = nrows;
3803     buf_si_i[0] = 0;
3804     nrows = 0;
3805     for (i=owners[proc]; i<owners[proc+1]; i++){
3806       anzi = ai[i+1] - ai[i];
3807       if (anzi) {
3808         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3809         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3810         nrows++;
3811       }
3812     }
3813     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3814     k++;
3815     buf_si += len_si[proc];
3816   }
3817 
3818   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3819   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3820 
3821   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3822   for (i=0; i<merge->nrecv; i++){
3823     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3824   }
3825 
3826   ierr = PetscFree(len_si);CHKERRQ(ierr);
3827   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3828   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3829   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3830   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3831   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3832   ierr = PetscFree(status);CHKERRQ(ierr);
3833 
3834   /* compute a local seq matrix in each processor */
3835   /*----------------------------------------------*/
3836   /* allocate bi array and free space for accumulating nonzero column info */
3837   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3838   bi[0] = 0;
3839 
3840   /* create and initialize a linked list */
3841   nlnk = N+1;
3842   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3843 
3844   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3845   len = 0;
3846   len  = ai[owners[rank+1]] - ai[owners[rank]];
3847   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3848   current_space = free_space;
3849 
3850   /* determine symbolic info for each local row */
3851   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3852   nextrow = buf_ri_k + merge->nrecv;
3853   nextai  = nextrow + merge->nrecv;
3854   for (k=0; k<merge->nrecv; k++){
3855     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3856     nrows = *buf_ri_k[k];
3857     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3858     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3859   }
3860 
3861   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3862   len = 0;
3863   for (i=0;i<m;i++) {
3864     bnzi   = 0;
3865     /* add local non-zero cols of this proc's seqmat into lnk */
3866     arow   = owners[rank] + i;
3867     anzi   = ai[arow+1] - ai[arow];
3868     aj     = a->j + ai[arow];
3869     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3870     bnzi += nlnk;
3871     /* add received col data into lnk */
3872     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3873       if (i == *nextrow[k]) { /* i-th row */
3874         anzi = *(nextai[k]+1) - *nextai[k];
3875         aj   = buf_rj[k] + *nextai[k];
3876         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3877         bnzi += nlnk;
3878         nextrow[k]++; nextai[k]++;
3879       }
3880     }
3881     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3882 
3883     /* if free space is not available, make more free space */
3884     if (current_space->local_remaining<bnzi) {
3885       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3886       nspacedouble++;
3887     }
3888     /* copy data into free space, then initialize lnk */
3889     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3890     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3891 
3892     current_space->array           += bnzi;
3893     current_space->local_used      += bnzi;
3894     current_space->local_remaining -= bnzi;
3895 
3896     bi[i+1] = bi[i] + bnzi;
3897   }
3898 
3899   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3900 
3901   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3902   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3903   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3904 
3905   /* create symbolic parallel matrix B_mpi */
3906   /*---------------------------------------*/
3907   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3908   if (n==PETSC_DECIDE) {
3909     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3910   } else {
3911     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3912   }
3913   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3914   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3915   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3916 
3917   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3918   B_mpi->assembled     = PETSC_FALSE;
3919   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3920   merge->bi            = bi;
3921   merge->bj            = bj;
3922   merge->buf_ri        = buf_ri;
3923   merge->buf_rj        = buf_rj;
3924   merge->coi           = PETSC_NULL;
3925   merge->coj           = PETSC_NULL;
3926   merge->owners_co     = PETSC_NULL;
3927 
3928   /* attach the supporting struct to B_mpi for reuse */
3929   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3930   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3931   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3932   *mpimat = B_mpi;
3933 
3934   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3935   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3936   PetscFunctionReturn(0);
3937 }
3938 
3939 static PetscEvent logkey_seqstompi = 0;
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatMerge_SeqsToMPI"
3942 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3943 {
3944   PetscErrorCode   ierr;
3945 
3946   PetscFunctionBegin;
3947   if (!logkey_seqstompi) {
3948     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3949   }
3950   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3951   if (scall == MAT_INITIAL_MATRIX){
3952     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3953   }
3954   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3955   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3956   PetscFunctionReturn(0);
3957 }
3958 static PetscEvent logkey_getlocalmat = 0;
3959 #undef __FUNCT__
3960 #define __FUNCT__ "MatGetLocalMat"
3961 /*@C
3962      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3963 
3964     Not Collective
3965 
3966    Input Parameters:
3967 +    A - the matrix
3968 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3969 
3970    Output Parameter:
3971 .    A_loc - the local sequential matrix generated
3972 
3973     Level: developer
3974 
3975 @*/
3976 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3977 {
3978   PetscErrorCode  ierr;
3979   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3980   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3981   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3982   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3983   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3984   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3985 
3986   PetscFunctionBegin;
3987   if (!logkey_getlocalmat) {
3988     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3989   }
3990   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3991   if (scall == MAT_INITIAL_MATRIX){
3992     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3993     ci[0] = 0;
3994     for (i=0; i<am; i++){
3995       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3996     }
3997     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3998     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3999     k = 0;
4000     for (i=0; i<am; i++) {
4001       ncols_o = bi[i+1] - bi[i];
4002       ncols_d = ai[i+1] - ai[i];
4003       /* off-diagonal portion of A */
4004       for (jo=0; jo<ncols_o; jo++) {
4005         col = cmap[*bj];
4006         if (col >= cstart) break;
4007         cj[k]   = col; bj++;
4008         ca[k++] = *ba++;
4009       }
4010       /* diagonal portion of A */
4011       for (j=0; j<ncols_d; j++) {
4012         cj[k]   = cstart + *aj++;
4013         ca[k++] = *aa++;
4014       }
4015       /* off-diagonal portion of A */
4016       for (j=jo; j<ncols_o; j++) {
4017         cj[k]   = cmap[*bj++];
4018         ca[k++] = *ba++;
4019       }
4020     }
4021     /* put together the new matrix */
4022     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4023     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4024     /* Since these are PETSc arrays, change flags to free them as necessary. */
4025     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4026     mat->free_a  = PETSC_TRUE;
4027     mat->free_ij = PETSC_TRUE;
4028     mat->nonew   = 0;
4029   } else if (scall == MAT_REUSE_MATRIX){
4030     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4031     ci = mat->i; cj = mat->j; ca = mat->a;
4032     for (i=0; i<am; i++) {
4033       /* off-diagonal portion of A */
4034       ncols_o = bi[i+1] - bi[i];
4035       for (jo=0; jo<ncols_o; jo++) {
4036         col = cmap[*bj];
4037         if (col >= cstart) break;
4038         *ca++ = *ba++; bj++;
4039       }
4040       /* diagonal portion of A */
4041       ncols_d = ai[i+1] - ai[i];
4042       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4043       /* off-diagonal portion of A */
4044       for (j=jo; j<ncols_o; j++) {
4045         *ca++ = *ba++; bj++;
4046       }
4047     }
4048   } else {
4049     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4050   }
4051 
4052   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4053   PetscFunctionReturn(0);
4054 }
4055 
4056 static PetscEvent logkey_getlocalmatcondensed = 0;
4057 #undef __FUNCT__
4058 #define __FUNCT__ "MatGetLocalMatCondensed"
4059 /*@C
4060      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4061 
4062     Not Collective
4063 
4064    Input Parameters:
4065 +    A - the matrix
4066 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4067 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4068 
4069    Output Parameter:
4070 .    A_loc - the local sequential matrix generated
4071 
4072     Level: developer
4073 
4074 @*/
4075 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4076 {
4077   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4078   PetscErrorCode    ierr;
4079   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4080   IS                isrowa,iscola;
4081   Mat               *aloc;
4082 
4083   PetscFunctionBegin;
4084   if (!logkey_getlocalmatcondensed) {
4085     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4086   }
4087   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4088   if (!row){
4089     start = A->rmap.rstart; end = A->rmap.rend;
4090     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4091   } else {
4092     isrowa = *row;
4093   }
4094   if (!col){
4095     start = A->cmap.rstart;
4096     cmap  = a->garray;
4097     nzA   = a->A->cmap.n;
4098     nzB   = a->B->cmap.n;
4099     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4100     ncols = 0;
4101     for (i=0; i<nzB; i++) {
4102       if (cmap[i] < start) idx[ncols++] = cmap[i];
4103       else break;
4104     }
4105     imark = i;
4106     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4107     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4108     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4109     ierr = PetscFree(idx);CHKERRQ(ierr);
4110   } else {
4111     iscola = *col;
4112   }
4113   if (scall != MAT_INITIAL_MATRIX){
4114     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4115     aloc[0] = *A_loc;
4116   }
4117   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4118   *A_loc = aloc[0];
4119   ierr = PetscFree(aloc);CHKERRQ(ierr);
4120   if (!row){
4121     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4122   }
4123   if (!col){
4124     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4125   }
4126   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4127   PetscFunctionReturn(0);
4128 }
4129 
4130 static PetscEvent logkey_GetBrowsOfAcols = 0;
4131 #undef __FUNCT__
4132 #define __FUNCT__ "MatGetBrowsOfAcols"
4133 /*@C
4134     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4135 
4136     Collective on Mat
4137 
4138    Input Parameters:
4139 +    A,B - the matrices in mpiaij format
4140 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4141 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4142 
4143    Output Parameter:
4144 +    rowb, colb - index sets of rows and columns of B to extract
4145 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4146 -    B_seq - the sequential matrix generated
4147 
4148     Level: developer
4149 
4150 @*/
4151 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4152 {
4153   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4154   PetscErrorCode    ierr;
4155   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4156   IS                isrowb,iscolb;
4157   Mat               *bseq;
4158 
4159   PetscFunctionBegin;
4160   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4161     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4162   }
4163   if (!logkey_GetBrowsOfAcols) {
4164     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4165   }
4166   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4167 
4168   if (scall == MAT_INITIAL_MATRIX){
4169     start = A->cmap.rstart;
4170     cmap  = a->garray;
4171     nzA   = a->A->cmap.n;
4172     nzB   = a->B->cmap.n;
4173     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4174     ncols = 0;
4175     for (i=0; i<nzB; i++) {  /* row < local row index */
4176       if (cmap[i] < start) idx[ncols++] = cmap[i];
4177       else break;
4178     }
4179     imark = i;
4180     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4181     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4182     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4183     ierr = PetscFree(idx);CHKERRQ(ierr);
4184     *brstart = imark;
4185     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4186   } else {
4187     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4188     isrowb = *rowb; iscolb = *colb;
4189     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4190     bseq[0] = *B_seq;
4191   }
4192   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4193   *B_seq = bseq[0];
4194   ierr = PetscFree(bseq);CHKERRQ(ierr);
4195   if (!rowb){
4196     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4197   } else {
4198     *rowb = isrowb;
4199   }
4200   if (!colb){
4201     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4202   } else {
4203     *colb = iscolb;
4204   }
4205   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4206   PetscFunctionReturn(0);
4207 }
4208 
4209 static PetscEvent logkey_GetBrowsOfAocols = 0;
4210 #undef __FUNCT__
4211 #define __FUNCT__ "MatGetBrowsOfAoCols"
4212 /*@C
4213     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4214     of the OFF-DIAGONAL portion of local A
4215 
4216     Collective on Mat
4217 
4218    Input Parameters:
4219 +    A,B - the matrices in mpiaij format
4220 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4221 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4222 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4223 
4224    Output Parameter:
4225 +    B_oth - the sequential matrix generated
4226 
4227     Level: developer
4228 
4229 @*/
4230 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4231 {
4232   VecScatter_MPI_General *gen_to,*gen_from;
4233   PetscErrorCode         ierr;
4234   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4235   Mat_SeqAIJ             *b_oth;
4236   VecScatter             ctx=a->Mvctx;
4237   MPI_Comm               comm=ctx->comm;
4238   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4239   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4240   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4241   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4242   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4243   MPI_Status             *sstatus,rstatus;
4244   PetscMPIInt            jj;
4245   PetscInt               *cols,sbs,rbs;
4246   PetscScalar            *vals;
4247 
4248   PetscFunctionBegin;
4249   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4250     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4251   }
4252   if (!logkey_GetBrowsOfAocols) {
4253     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4254   }
4255   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4256   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4257 
4258   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4259   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4260   rvalues  = gen_from->values; /* holds the length of receiving row */
4261   svalues  = gen_to->values;   /* holds the length of sending row */
4262   nrecvs   = gen_from->n;
4263   nsends   = gen_to->n;
4264 
4265   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4266   srow     = gen_to->indices;   /* local row index to be sent */
4267   sstarts  = gen_to->starts;
4268   sprocs   = gen_to->procs;
4269   sstatus  = gen_to->sstatus;
4270   sbs      = gen_to->bs;
4271   rstarts  = gen_from->starts;
4272   rprocs   = gen_from->procs;
4273   rbs      = gen_from->bs;
4274 
4275   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4276   if (scall == MAT_INITIAL_MATRIX){
4277     /* i-array */
4278     /*---------*/
4279     /*  post receives */
4280     for (i=0; i<nrecvs; i++){
4281       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4282       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4283       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4284     }
4285 
4286     /* pack the outgoing message */
4287     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4288     rstartsj = sstartsj + nsends +1;
4289     sstartsj[0] = 0;  rstartsj[0] = 0;
4290     len = 0; /* total length of j or a array to be sent */
4291     k = 0;
4292     for (i=0; i<nsends; i++){
4293       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4294       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4295       for (j=0; j<nrows; j++) {
4296         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4297         for (l=0; l<sbs; l++){
4298           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4299           rowlen[j*sbs+l] = ncols;
4300           len += ncols;
4301           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4302         }
4303         k++;
4304       }
4305       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4306       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4307     }
4308     /* recvs and sends of i-array are completed */
4309     i = nrecvs;
4310     while (i--) {
4311       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4312     }
4313     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4314 
4315     /* allocate buffers for sending j and a arrays */
4316     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4317     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4318 
4319     /* create i-array of B_oth */
4320     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4321     b_othi[0] = 0;
4322     len = 0; /* total length of j or a array to be received */
4323     k = 0;
4324     for (i=0; i<nrecvs; i++){
4325       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4326       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4327       for (j=0; j<nrows; j++) {
4328         b_othi[k+1] = b_othi[k] + rowlen[j];
4329         len += rowlen[j]; k++;
4330       }
4331       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4332     }
4333 
4334     /* allocate space for j and a arrrays of B_oth */
4335     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4336     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4337 
4338     /* j-array */
4339     /*---------*/
4340     /*  post receives of j-array */
4341     for (i=0; i<nrecvs; i++){
4342       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4343       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4344     }
4345 
4346     /* pack the outgoing message j-array */
4347     k = 0;
4348     for (i=0; i<nsends; i++){
4349       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4350       bufJ = bufj+sstartsj[i];
4351       for (j=0; j<nrows; j++) {
4352         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4353         for (ll=0; ll<sbs; ll++){
4354           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4355           for (l=0; l<ncols; l++){
4356             *bufJ++ = cols[l];
4357           }
4358           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4359         }
4360       }
4361       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4362     }
4363 
4364     /* recvs and sends of j-array are completed */
4365     i = nrecvs;
4366     while (i--) {
4367       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4368     }
4369     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4370   } else if (scall == MAT_REUSE_MATRIX){
4371     sstartsj = *startsj;
4372     rstartsj = sstartsj + nsends +1;
4373     bufa     = *bufa_ptr;
4374     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4375     b_otha   = b_oth->a;
4376   } else {
4377     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4378   }
4379 
4380   /* a-array */
4381   /*---------*/
4382   /*  post receives of a-array */
4383   for (i=0; i<nrecvs; i++){
4384     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4385     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4386   }
4387 
4388   /* pack the outgoing message a-array */
4389   k = 0;
4390   for (i=0; i<nsends; i++){
4391     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4392     bufA = bufa+sstartsj[i];
4393     for (j=0; j<nrows; j++) {
4394       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4395       for (ll=0; ll<sbs; ll++){
4396         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4397         for (l=0; l<ncols; l++){
4398           *bufA++ = vals[l];
4399         }
4400         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4401       }
4402     }
4403     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4404   }
4405   /* recvs and sends of a-array are completed */
4406   i = nrecvs;
4407   while (i--) {
4408     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4409   }
4410   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4411   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4412 
4413   if (scall == MAT_INITIAL_MATRIX){
4414     /* put together the new matrix */
4415     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4416 
4417     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4418     /* Since these are PETSc arrays, change flags to free them as necessary. */
4419     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4420     b_oth->free_a  = PETSC_TRUE;
4421     b_oth->free_ij = PETSC_TRUE;
4422     b_oth->nonew   = 0;
4423 
4424     ierr = PetscFree(bufj);CHKERRQ(ierr);
4425     if (!startsj || !bufa_ptr){
4426       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4427       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4428     } else {
4429       *startsj  = sstartsj;
4430       *bufa_ptr = bufa;
4431     }
4432   }
4433   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #undef __FUNCT__
4438 #define __FUNCT__ "MatGetCommunicationStructs"
4439 /*@C
4440   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4441 
4442   Not Collective
4443 
4444   Input Parameters:
4445 . A - The matrix in mpiaij format
4446 
4447   Output Parameter:
4448 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4449 . colmap - A map from global column index to local index into lvec
4450 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4451 
4452   Level: developer
4453 
4454 @*/
4455 #if defined (PETSC_USE_CTABLE)
4456 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4457 #else
4458 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4459 #endif
4460 {
4461   Mat_MPIAIJ *a;
4462 
4463   PetscFunctionBegin;
4464   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4465   PetscValidPointer(lvec, 2)
4466   PetscValidPointer(colmap, 3)
4467   PetscValidPointer(multScatter, 4)
4468   a = (Mat_MPIAIJ *) A->data;
4469   if (lvec) *lvec = a->lvec;
4470   if (colmap) *colmap = a->colmap;
4471   if (multScatter) *multScatter = a->Mvctx;
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 EXTERN_C_BEGIN
4476 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4477 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4478 EXTERN_C_END
4479 
4480 /*MC
4481    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4482 
4483    Options Database Keys:
4484 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4485 
4486   Level: beginner
4487 
4488 .seealso: MatCreateMPIAIJ
4489 M*/
4490 
4491 EXTERN_C_BEGIN
4492 #undef __FUNCT__
4493 #define __FUNCT__ "MatCreate_MPIAIJ"
4494 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4495 {
4496   Mat_MPIAIJ     *b;
4497   PetscErrorCode ierr;
4498   PetscMPIInt    size;
4499 
4500   PetscFunctionBegin;
4501   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4502 
4503   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4504   B->data         = (void*)b;
4505   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4506   B->factor       = 0;
4507   B->rmap.bs      = 1;
4508   B->assembled    = PETSC_FALSE;
4509   B->mapping      = 0;
4510 
4511   B->insertmode      = NOT_SET_VALUES;
4512   b->size            = size;
4513   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4514 
4515   /* build cache for off array entries formed */
4516   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4517   b->donotstash  = PETSC_FALSE;
4518   b->colmap      = 0;
4519   b->garray      = 0;
4520   b->roworiented = PETSC_TRUE;
4521 
4522   /* stuff used for matrix vector multiply */
4523   b->lvec      = PETSC_NULL;
4524   b->Mvctx     = PETSC_NULL;
4525 
4526   /* stuff for MatGetRow() */
4527   b->rowindices   = 0;
4528   b->rowvalues    = 0;
4529   b->getrowactive = PETSC_FALSE;
4530 
4531 
4532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4533                                      "MatStoreValues_MPIAIJ",
4534                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4536                                      "MatRetrieveValues_MPIAIJ",
4537                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4538   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4539 				     "MatGetDiagonalBlock_MPIAIJ",
4540                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4541   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4542 				     "MatIsTranspose_MPIAIJ",
4543 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4544   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4545 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4546 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4547   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4548 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4549 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4550   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4551 				     "MatDiagonalScaleLocal_MPIAIJ",
4552 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4553   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4554                                      "MatConvert_MPIAIJ_MPICSRPERM",
4555                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4556   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4557                                      "MatConvert_MPIAIJ_MPICRL",
4558                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4559   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 EXTERN_C_END
4563 
4564 #undef __FUNCT__
4565 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4566 /*@C
4567      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4568          and "off-diagonal" part of the matrix in CSR format.
4569 
4570    Collective on MPI_Comm
4571 
4572    Input Parameters:
4573 +  comm - MPI communicator
4574 .  m - number of local rows (Cannot be PETSC_DECIDE)
4575 .  n - This value should be the same as the local size used in creating the
4576        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4577        calculated if N is given) For square matrices n is almost always m.
4578 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4579 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4580 .   i - row indices for "diagonal" portion of matrix
4581 .   j - column indices
4582 .   a - matrix values
4583 .   oi - row indices for "off-diagonal" portion of matrix
4584 .   oj - column indices
4585 -   oa - matrix values
4586 
4587    Output Parameter:
4588 .   mat - the matrix
4589 
4590    Level: advanced
4591 
4592    Notes:
4593        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4594 
4595        The i and j indices are 0 based
4596 
4597        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4598 
4599 
4600 .keywords: matrix, aij, compressed row, sparse, parallel
4601 
4602 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4603           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4604 @*/
4605 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4606 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4607 {
4608   PetscErrorCode ierr;
4609   Mat_MPIAIJ     *maij;
4610 
4611  PetscFunctionBegin;
4612   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4613   if (i[0]) {
4614     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4615   }
4616   if (oi[0]) {
4617     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4618   }
4619   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4620   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4621   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4622   maij = (Mat_MPIAIJ*) (*mat)->data;
4623   maij->donotstash     = PETSC_TRUE;
4624   (*mat)->preallocated = PETSC_TRUE;
4625 
4626   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4627   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4628   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4629 
4630   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4631   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4632 
4633   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4634   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4635   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4636   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4637 
4638   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4639   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4640   PetscFunctionReturn(0);
4641 }
4642 
4643 /*
4644     Special version for direct calls from Fortran
4645 */
4646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4647 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4649 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4650 #endif
4651 
4652 /* Change these macros so can be used in void function */
4653 #undef CHKERRQ
4654 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4655 #undef SETERRQ2
4656 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4657 #undef SETERRQ
4658 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4659 
4660 EXTERN_C_BEGIN
4661 #undef __FUNCT__
4662 #define __FUNCT__ "matsetvaluesmpiaij_"
4663 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4664 {
4665   Mat            mat = *mmat;
4666   PetscInt       m = *mm, n = *mn;
4667   InsertMode     addv = *maddv;
4668   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4669   PetscScalar    value;
4670   PetscErrorCode ierr;
4671 
4672   MatPreallocated(mat);
4673   if (mat->insertmode == NOT_SET_VALUES) {
4674     mat->insertmode = addv;
4675   }
4676 #if defined(PETSC_USE_DEBUG)
4677   else if (mat->insertmode != addv) {
4678     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4679   }
4680 #endif
4681   {
4682   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4683   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4684   PetscTruth     roworiented = aij->roworiented;
4685 
4686   /* Some Variables required in the macro */
4687   Mat            A = aij->A;
4688   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4689   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4690   PetscScalar    *aa = a->a;
4691   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4692   Mat            B = aij->B;
4693   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4694   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4695   PetscScalar    *ba = b->a;
4696 
4697   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4698   PetscInt       nonew = a->nonew;
4699   PetscScalar    *ap1,*ap2;
4700 
4701   PetscFunctionBegin;
4702   for (i=0; i<m; i++) {
4703     if (im[i] < 0) continue;
4704 #if defined(PETSC_USE_DEBUG)
4705     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4706 #endif
4707     if (im[i] >= rstart && im[i] < rend) {
4708       row      = im[i] - rstart;
4709       lastcol1 = -1;
4710       rp1      = aj + ai[row];
4711       ap1      = aa + ai[row];
4712       rmax1    = aimax[row];
4713       nrow1    = ailen[row];
4714       low1     = 0;
4715       high1    = nrow1;
4716       lastcol2 = -1;
4717       rp2      = bj + bi[row];
4718       ap2      = ba + bi[row];
4719       rmax2    = bimax[row];
4720       nrow2    = bilen[row];
4721       low2     = 0;
4722       high2    = nrow2;
4723 
4724       for (j=0; j<n; j++) {
4725         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4726         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4727         if (in[j] >= cstart && in[j] < cend){
4728           col = in[j] - cstart;
4729           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4730         } else if (in[j] < 0) continue;
4731 #if defined(PETSC_USE_DEBUG)
4732         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4733 #endif
4734         else {
4735           if (mat->was_assembled) {
4736             if (!aij->colmap) {
4737               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4738             }
4739 #if defined (PETSC_USE_CTABLE)
4740             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4741 	    col--;
4742 #else
4743             col = aij->colmap[in[j]] - 1;
4744 #endif
4745             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4746               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4747               col =  in[j];
4748               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4749               B = aij->B;
4750               b = (Mat_SeqAIJ*)B->data;
4751               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4752               rp2      = bj + bi[row];
4753               ap2      = ba + bi[row];
4754               rmax2    = bimax[row];
4755               nrow2    = bilen[row];
4756               low2     = 0;
4757               high2    = nrow2;
4758               bm       = aij->B->rmap.n;
4759               ba = b->a;
4760             }
4761           } else col = in[j];
4762           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4763         }
4764       }
4765     } else {
4766       if (!aij->donotstash) {
4767         if (roworiented) {
4768           if (ignorezeroentries && v[i*n] == 0.0) continue;
4769           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4770         } else {
4771           if (ignorezeroentries && v[i] == 0.0) continue;
4772           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4773         }
4774       }
4775     }
4776   }}
4777   PetscFunctionReturnVoid();
4778 }
4779 EXTERN_C_END
4780 
4781