xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2f85c6a59d752db987113db0b8a978ea7d4ca5ee)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NEW_NONZERO_LOCATIONS:
1207   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1208   case MAT_KEEP_ZEROED_ROWS:
1209   case MAT_NEW_NONZERO_LOCATION_ERR:
1210   case MAT_USE_INODES:
1211   case MAT_IGNORE_ZERO_ENTRIES:
1212     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1214     break;
1215   case MAT_ROW_ORIENTED:
1216     a->roworiented = flg;
1217     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1219     break;
1220   case MAT_NEW_DIAGONALS:
1221     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1222     break;
1223   case MAT_IGNORE_OFF_PROC_ENTRIES:
1224     a->donotstash = PETSC_TRUE;
1225     break;
1226   case MAT_SYMMETRIC:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   case MAT_STRUCTURALLY_SYMMETRIC:
1230   case MAT_HERMITIAN:
1231   case MAT_SYMMETRY_ETERNAL:
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     break;
1234   default:
1235     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatGetRow_MPIAIJ"
1242 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1243 {
1244   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1245   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1246   PetscErrorCode ierr;
1247   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1248   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1249   PetscInt       *cmap,*idx_p;
1250 
1251   PetscFunctionBegin;
1252   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1253   mat->getrowactive = PETSC_TRUE;
1254 
1255   if (!mat->rowvalues && (idx || v)) {
1256     /*
1257         allocate enough space to hold information from the longest row.
1258     */
1259     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1260     PetscInt     max = 1,tmp;
1261     for (i=0; i<matin->rmap.n; i++) {
1262       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1263       if (max < tmp) { max = tmp; }
1264     }
1265     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1266     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1267   }
1268 
1269   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1270   lrow = row - rstart;
1271 
1272   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1273   if (!v)   {pvA = 0; pvB = 0;}
1274   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1275   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1276   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1277   nztot = nzA + nzB;
1278 
1279   cmap  = mat->garray;
1280   if (v  || idx) {
1281     if (nztot) {
1282       /* Sort by increasing column numbers, assuming A and B already sorted */
1283       PetscInt imark = -1;
1284       if (v) {
1285         *v = v_p = mat->rowvalues;
1286         for (i=0; i<nzB; i++) {
1287           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1288           else break;
1289         }
1290         imark = i;
1291         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1292         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1293       }
1294       if (idx) {
1295         *idx = idx_p = mat->rowindices;
1296         if (imark > -1) {
1297           for (i=0; i<imark; i++) {
1298             idx_p[i] = cmap[cworkB[i]];
1299           }
1300         } else {
1301           for (i=0; i<nzB; i++) {
1302             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1303             else break;
1304           }
1305           imark = i;
1306         }
1307         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1308         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1309       }
1310     } else {
1311       if (idx) *idx = 0;
1312       if (v)   *v   = 0;
1313     }
1314   }
1315   *nz = nztot;
1316   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1317   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1323 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1324 {
1325   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1326 
1327   PetscFunctionBegin;
1328   if (!aij->getrowactive) {
1329     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1330   }
1331   aij->getrowactive = PETSC_FALSE;
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatNorm_MPIAIJ"
1337 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1338 {
1339   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1340   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1341   PetscErrorCode ierr;
1342   PetscInt       i,j,cstart = mat->cmap.rstart;
1343   PetscReal      sum = 0.0;
1344   PetscScalar    *v;
1345 
1346   PetscFunctionBegin;
1347   if (aij->size == 1) {
1348     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1349   } else {
1350     if (type == NORM_FROBENIUS) {
1351       v = amat->a;
1352       for (i=0; i<amat->nz; i++) {
1353 #if defined(PETSC_USE_COMPLEX)
1354         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1355 #else
1356         sum += (*v)*(*v); v++;
1357 #endif
1358       }
1359       v = bmat->a;
1360       for (i=0; i<bmat->nz; i++) {
1361 #if defined(PETSC_USE_COMPLEX)
1362         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1363 #else
1364         sum += (*v)*(*v); v++;
1365 #endif
1366       }
1367       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1368       *norm = sqrt(*norm);
1369     } else if (type == NORM_1) { /* max column norm */
1370       PetscReal *tmp,*tmp2;
1371       PetscInt    *jj,*garray = aij->garray;
1372       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1373       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1374       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1375       *norm = 0.0;
1376       v = amat->a; jj = amat->j;
1377       for (j=0; j<amat->nz; j++) {
1378         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1379       }
1380       v = bmat->a; jj = bmat->j;
1381       for (j=0; j<bmat->nz; j++) {
1382         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1383       }
1384       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1385       for (j=0; j<mat->cmap.N; j++) {
1386         if (tmp2[j] > *norm) *norm = tmp2[j];
1387       }
1388       ierr = PetscFree(tmp);CHKERRQ(ierr);
1389       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1390     } else if (type == NORM_INFINITY) { /* max row norm */
1391       PetscReal ntemp = 0.0;
1392       for (j=0; j<aij->A->rmap.n; j++) {
1393         v = amat->a + amat->i[j];
1394         sum = 0.0;
1395         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1396           sum += PetscAbsScalar(*v); v++;
1397         }
1398         v = bmat->a + bmat->i[j];
1399         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1400           sum += PetscAbsScalar(*v); v++;
1401         }
1402         if (sum > ntemp) ntemp = sum;
1403       }
1404       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1405     } else {
1406       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1407     }
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatTranspose_MPIAIJ"
1414 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1415 {
1416   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1417   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1418   PetscErrorCode ierr;
1419   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1420   PetscInt       cstart=A->cmap.rstart,ncol;
1421   Mat            B;
1422   PetscScalar    *array;
1423 
1424   PetscFunctionBegin;
1425   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1426 
1427   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1428   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1429   ai = Aloc->i; aj = Aloc->j;
1430   bi = Bloc->i; bj = Bloc->j;
1431   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1432   cols = d_nnz + na + 1; /* work space to be used by B part */
1433   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1434   for (i=0; i<ai[ma]; i++){
1435     d_nnz[aj[i]] ++;
1436     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1437   }
1438 
1439   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1440   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1441   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1442   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1443 
1444   /* copy over the A part */
1445   array = Aloc->a;
1446   row = A->rmap.rstart;
1447   for (i=0; i<ma; i++) {
1448     ncol = ai[i+1]-ai[i];
1449     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1450     row++; array += ncol; aj += ncol;
1451   }
1452   aj = Aloc->j;
1453   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1454 
1455   /* copy over the B part */
1456   array = Bloc->a;
1457   row = A->rmap.rstart;
1458   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1459   for (i=0; i<mb; i++) {
1460     ncol = bi[i+1]-bi[i];
1461     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1462     row++; array += ncol; cols += ncol;
1463   }
1464   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1467   if (matout) {
1468     *matout = B;
1469   } else {
1470     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1477 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1478 {
1479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1480   Mat            a = aij->A,b = aij->B;
1481   PetscErrorCode ierr;
1482   PetscInt       s1,s2,s3;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1486   if (rr) {
1487     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1488     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1489     /* Overlap communication with computation. */
1490     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491   }
1492   if (ll) {
1493     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1494     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1495     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1496   }
1497   /* scale  the diagonal block */
1498   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1499 
1500   if (rr) {
1501     /* Do a scatter end and then right scale the off-diagonal block */
1502     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1504   }
1505 
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1511 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1512 {
1513   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1518   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1523 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1524 {
1525   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatEqual_MPIAIJ"
1535 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1536 {
1537   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1538   Mat            a,b,c,d;
1539   PetscTruth     flg;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   a = matA->A; b = matA->B;
1544   c = matB->A; d = matB->B;
1545 
1546   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1547   if (flg) {
1548     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1549   }
1550   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatCopy_MPIAIJ"
1556 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1557 {
1558   PetscErrorCode ierr;
1559   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1560   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1561 
1562   PetscFunctionBegin;
1563   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1564   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1565     /* because of the column compression in the off-processor part of the matrix a->B,
1566        the number of columns in a->B and b->B may be different, hence we cannot call
1567        the MatCopy() directly on the two parts. If need be, we can provide a more
1568        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1569        then copying the submatrices */
1570     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1571   } else {
1572     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1573     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1580 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1581 {
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #include "petscblaslapack.h"
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatAXPY_MPIAIJ"
1592 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1593 {
1594   PetscErrorCode ierr;
1595   PetscInt       i;
1596   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1597   PetscBLASInt   bnz,one=1;
1598   Mat_SeqAIJ     *x,*y;
1599 
1600   PetscFunctionBegin;
1601   if (str == SAME_NONZERO_PATTERN) {
1602     PetscScalar alpha = a;
1603     x = (Mat_SeqAIJ *)xx->A->data;
1604     y = (Mat_SeqAIJ *)yy->A->data;
1605     bnz = (PetscBLASInt)x->nz;
1606     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1607     x = (Mat_SeqAIJ *)xx->B->data;
1608     y = (Mat_SeqAIJ *)yy->B->data;
1609     bnz = (PetscBLASInt)x->nz;
1610     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1611   } else if (str == SUBSET_NONZERO_PATTERN) {
1612     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1613 
1614     x = (Mat_SeqAIJ *)xx->B->data;
1615     y = (Mat_SeqAIJ *)yy->B->data;
1616     if (y->xtoy && y->XtoY != xx->B) {
1617       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1618       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1619     }
1620     if (!y->xtoy) { /* get xtoy */
1621       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1622       y->XtoY = xx->B;
1623       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1624     }
1625     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1626   } else {
1627     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatConjugate_MPIAIJ"
1636 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1637 {
1638 #if defined(PETSC_USE_COMPLEX)
1639   PetscErrorCode ierr;
1640   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1641 
1642   PetscFunctionBegin;
1643   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1644   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1645 #else
1646   PetscFunctionBegin;
1647 #endif
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatRealPart_MPIAIJ"
1653 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1654 {
1655   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1656   PetscErrorCode ierr;
1657 
1658   PetscFunctionBegin;
1659   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1660   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1666 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1667 {
1668   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1673   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #ifdef PETSC_HAVE_PBGL
1678 
1679 #include <boost/parallel/mpi/bsp_process_group.hpp>
1680 #include <boost/graph/distributed/ilu_default_graph.hpp>
1681 #include <boost/graph/distributed/ilu_0_block.hpp>
1682 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1683 #include <boost/graph/distributed/petsc/interface.hpp>
1684 #include <boost/multi_array.hpp>
1685 #include <boost/parallel/distributed_property_map.hpp>
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1689 /*
1690   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1691 */
1692 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1693 {
1694   namespace petsc = boost::distributed::petsc;
1695 
1696   namespace graph_dist = boost::graph::distributed;
1697   using boost::graph::distributed::ilu_default::process_group_type;
1698   using boost::graph::ilu_permuted;
1699 
1700   PetscTruth      row_identity, col_identity;
1701   PetscContainer  c;
1702   PetscInt        m, n, M, N;
1703   PetscErrorCode  ierr;
1704 
1705   PetscFunctionBegin;
1706   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1707   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1708   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1709   if (!row_identity || !col_identity) {
1710     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1711   }
1712 
1713   process_group_type pg;
1714   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1715   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1716   lgraph_type&   level_graph = *lgraph_p;
1717   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1718 
1719   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1720   ilu_permuted(level_graph);
1721 
1722   /* put together the new matrix */
1723   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1724   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1725   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1726   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1727   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1728   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1729   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1730   (*fact)->factor = FACTOR_LU;
1731 
1732   ierr = PetscContainerCreate(A->comm, &c);
1733   ierr = PetscContainerSetPointer(c, lgraph_p);
1734   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1740 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1741 {
1742   PetscFunctionBegin;
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSolve_MPIAIJ"
1748 /*
1749   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1750 */
1751 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1752 {
1753   namespace graph_dist = boost::graph::distributed;
1754 
1755   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1756   lgraph_type*   lgraph_p;
1757   PetscContainer c;
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1762   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1763   ierr = VecCopy(b, x); CHKERRQ(ierr);
1764 
1765   PetscScalar* array_x;
1766   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1767   PetscInt sx;
1768   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1769 
1770   PetscScalar* array_b;
1771   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1772   PetscInt sb;
1773   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1774 
1775   lgraph_type&   level_graph = *lgraph_p;
1776   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1777 
1778   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1779   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1780                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1781 
1782   typedef boost::iterator_property_map<array_ref_type::iterator,
1783                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1784   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1785                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1786 
1787   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1788 
1789   PetscFunctionReturn(0);
1790 }
1791 #endif
1792 
1793 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1794   PetscInt       nzlocal,nsends,nrecvs;
1795   PetscMPIInt    *send_rank;
1796   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1797   PetscScalar    *sbuf_a,**rbuf_a;
1798   PetscErrorCode (*MatDestroy)(Mat);
1799 } Mat_Redundant;
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1803 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1804 {
1805   PetscErrorCode       ierr;
1806   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1807   PetscInt             i;
1808 
1809   PetscFunctionBegin;
1810   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1811   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1812   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1813   for (i=0; i<redund->nrecvs; i++){
1814     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1815     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1816   }
1817   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1818   ierr = PetscFree(redund);CHKERRQ(ierr);
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatDestroy_MatRedundant"
1824 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1825 {
1826   PetscErrorCode  ierr;
1827   PetscContainer  container;
1828   Mat_Redundant   *redund=PETSC_NULL;
1829 
1830   PetscFunctionBegin;
1831   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1832   if (container) {
1833     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1834   } else {
1835     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1836   }
1837   A->ops->destroy = redund->MatDestroy;
1838   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1839   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1840   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1846 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1847 {
1848   PetscMPIInt    rank,size;
1849   MPI_Comm       comm=mat->comm;
1850   PetscErrorCode ierr;
1851   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1852   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1853   PetscInt       *rowrange=mat->rmap.range;
1854   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1855   Mat            A=aij->A,B=aij->B,C=*matredundant;
1856   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1857   PetscScalar    *sbuf_a;
1858   PetscInt       nzlocal=a->nz+b->nz;
1859   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1860   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1861   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1862   PetscScalar    *vals,*aworkA,*aworkB;
1863   PetscMPIInt    tag1,tag2,tag3,imdex;
1864   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1865                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1866   MPI_Status     recv_status,*send_status;
1867   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1868   PetscInt       **rbuf_j=PETSC_NULL;
1869   PetscScalar    **rbuf_a=PETSC_NULL;
1870   Mat_Redundant  *redund=PETSC_NULL;
1871   PetscContainer container;
1872 
1873   PetscFunctionBegin;
1874   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1875   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1876 
1877   if (reuse == MAT_REUSE_MATRIX) {
1878     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1879     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1880     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1881     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1882     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1883     if (container) {
1884       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1885     } else {
1886       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1887     }
1888     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1889 
1890     nsends    = redund->nsends;
1891     nrecvs    = redund->nrecvs;
1892     send_rank = redund->send_rank; recv_rank = send_rank + size;
1893     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1894     sbuf_j    = redund->sbuf_j;
1895     sbuf_a    = redund->sbuf_a;
1896     rbuf_j    = redund->rbuf_j;
1897     rbuf_a    = redund->rbuf_a;
1898   }
1899 
1900   if (reuse == MAT_INITIAL_MATRIX){
1901     PetscMPIInt  subrank,subsize;
1902     PetscInt     nleftover,np_subcomm;
1903     /* get the destination processors' id send_rank, nsends and nrecvs */
1904     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1905     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1906     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1907     recv_rank = send_rank + size;
1908     np_subcomm = size/nsubcomm;
1909     nleftover  = size - nsubcomm*np_subcomm;
1910     nsends = 0; nrecvs = 0;
1911     for (i=0; i<size; i++){ /* i=rank*/
1912       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1913         send_rank[nsends] = i; nsends++;
1914         recv_rank[nrecvs++] = i;
1915       }
1916     }
1917     if (rank >= size - nleftover){/* this proc is a leftover processor */
1918       i = size-nleftover-1;
1919       j = 0;
1920       while (j < nsubcomm - nleftover){
1921         send_rank[nsends++] = i;
1922         i--; j++;
1923       }
1924     }
1925 
1926     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1927       for (i=0; i<nleftover; i++){
1928         recv_rank[nrecvs++] = size-nleftover+i;
1929       }
1930     }
1931 
1932     /* allocate sbuf_j, sbuf_a */
1933     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1934     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1935     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1936   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1937 
1938   /* copy mat's local entries into the buffers */
1939   if (reuse == MAT_INITIAL_MATRIX){
1940     rownz_max = 0;
1941     rptr = sbuf_j;
1942     cols = sbuf_j + rend-rstart + 1;
1943     vals = sbuf_a;
1944     rptr[0] = 0;
1945     for (i=0; i<rend-rstart; i++){
1946       row = i + rstart;
1947       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1948       ncols  = nzA + nzB;
1949       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1950       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1951       /* load the column indices for this row into cols */
1952       lwrite = 0;
1953       for (l=0; l<nzB; l++) {
1954         if ((ctmp = bmap[cworkB[l]]) < cstart){
1955           vals[lwrite]   = aworkB[l];
1956           cols[lwrite++] = ctmp;
1957         }
1958       }
1959       for (l=0; l<nzA; l++){
1960         vals[lwrite]   = aworkA[l];
1961         cols[lwrite++] = cstart + cworkA[l];
1962       }
1963       for (l=0; l<nzB; l++) {
1964         if ((ctmp = bmap[cworkB[l]]) >= cend){
1965           vals[lwrite]   = aworkB[l];
1966           cols[lwrite++] = ctmp;
1967         }
1968       }
1969       vals += ncols;
1970       cols += ncols;
1971       rptr[i+1] = rptr[i] + ncols;
1972       if (rownz_max < ncols) rownz_max = ncols;
1973     }
1974     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1975   } else { /* only copy matrix values into sbuf_a */
1976     rptr = sbuf_j;
1977     vals = sbuf_a;
1978     rptr[0] = 0;
1979     for (i=0; i<rend-rstart; i++){
1980       row = i + rstart;
1981       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1982       ncols  = nzA + nzB;
1983       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1984       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1985       lwrite = 0;
1986       for (l=0; l<nzB; l++) {
1987         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1988       }
1989       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1990       for (l=0; l<nzB; l++) {
1991         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1992       }
1993       vals += ncols;
1994       rptr[i+1] = rptr[i] + ncols;
1995     }
1996   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1997 
1998   /* send nzlocal to others, and recv other's nzlocal */
1999   /*--------------------------------------------------*/
2000   if (reuse == MAT_INITIAL_MATRIX){
2001     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2002     s_waits2 = s_waits3 + nsends;
2003     s_waits1 = s_waits2 + nsends;
2004     r_waits1 = s_waits1 + nsends;
2005     r_waits2 = r_waits1 + nrecvs;
2006     r_waits3 = r_waits2 + nrecvs;
2007   } else {
2008     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2009     r_waits3 = s_waits3 + nsends;
2010   }
2011 
2012   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2013   if (reuse == MAT_INITIAL_MATRIX){
2014     /* get new tags to keep the communication clean */
2015     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2016     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2017     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2018     rbuf_nz = sbuf_nz + nsends;
2019 
2020     /* post receives of other's nzlocal */
2021     for (i=0; i<nrecvs; i++){
2022       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2023     }
2024     /* send nzlocal to others */
2025     for (i=0; i<nsends; i++){
2026       sbuf_nz[i] = nzlocal;
2027       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2028     }
2029     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2030     count = nrecvs;
2031     while (count) {
2032       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2033       recv_rank[imdex] = recv_status.MPI_SOURCE;
2034       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2035       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2036 
2037       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2038       rbuf_nz[imdex] += i + 2;
2039       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2040       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2041       count--;
2042     }
2043     /* wait on sends of nzlocal */
2044     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2045     /* send mat->i,j to others, and recv from other's */
2046     /*------------------------------------------------*/
2047     for (i=0; i<nsends; i++){
2048       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2049       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2050     }
2051     /* wait on receives of mat->i,j */
2052     /*------------------------------*/
2053     count = nrecvs;
2054     while (count) {
2055       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2056       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2057       count--;
2058     }
2059     /* wait on sends of mat->i,j */
2060     /*---------------------------*/
2061     if (nsends) {
2062       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2063     }
2064   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2065 
2066   /* post receives, send and receive mat->a */
2067   /*----------------------------------------*/
2068   for (imdex=0; imdex<nrecvs; imdex++) {
2069     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2070   }
2071   for (i=0; i<nsends; i++){
2072     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2073   }
2074   count = nrecvs;
2075   while (count) {
2076     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2077     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2078     count--;
2079   }
2080   if (nsends) {
2081     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2082   }
2083 
2084   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2085 
2086   /* create redundant matrix */
2087   /*-------------------------*/
2088   if (reuse == MAT_INITIAL_MATRIX){
2089     /* compute rownz_max for preallocation */
2090     for (imdex=0; imdex<nrecvs; imdex++){
2091       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2092       rptr = rbuf_j[imdex];
2093       for (i=0; i<j; i++){
2094         ncols = rptr[i+1] - rptr[i];
2095         if (rownz_max < ncols) rownz_max = ncols;
2096       }
2097     }
2098 
2099     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2100     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2101     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2102     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2103     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2104   } else {
2105     C = *matredundant;
2106   }
2107 
2108   /* insert local matrix entries */
2109   rptr = sbuf_j;
2110   cols = sbuf_j + rend-rstart + 1;
2111   vals = sbuf_a;
2112   for (i=0; i<rend-rstart; i++){
2113     row   = i + rstart;
2114     ncols = rptr[i+1] - rptr[i];
2115     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2116     vals += ncols;
2117     cols += ncols;
2118   }
2119   /* insert received matrix entries */
2120   for (imdex=0; imdex<nrecvs; imdex++){
2121     rstart = rowrange[recv_rank[imdex]];
2122     rend   = rowrange[recv_rank[imdex]+1];
2123     rptr = rbuf_j[imdex];
2124     cols = rbuf_j[imdex] + rend-rstart + 1;
2125     vals = rbuf_a[imdex];
2126     for (i=0; i<rend-rstart; i++){
2127       row   = i + rstart;
2128       ncols = rptr[i+1] - rptr[i];
2129       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2130       vals += ncols;
2131       cols += ncols;
2132     }
2133   }
2134   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2137   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2138   if (reuse == MAT_INITIAL_MATRIX){
2139     PetscContainer container;
2140     *matredundant = C;
2141     /* create a supporting struct and attach it to C for reuse */
2142     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2143     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2144     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2145     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2146     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2147 
2148     redund->nzlocal = nzlocal;
2149     redund->nsends  = nsends;
2150     redund->nrecvs  = nrecvs;
2151     redund->send_rank = send_rank;
2152     redund->sbuf_nz = sbuf_nz;
2153     redund->sbuf_j  = sbuf_j;
2154     redund->sbuf_a  = sbuf_a;
2155     redund->rbuf_j  = rbuf_j;
2156     redund->rbuf_a  = rbuf_a;
2157 
2158     redund->MatDestroy = C->ops->destroy;
2159     C->ops->destroy    = MatDestroy_MatRedundant;
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 /* -------------------------------------------------------------------*/
2165 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2166        MatGetRow_MPIAIJ,
2167        MatRestoreRow_MPIAIJ,
2168        MatMult_MPIAIJ,
2169 /* 4*/ MatMultAdd_MPIAIJ,
2170        MatMultTranspose_MPIAIJ,
2171        MatMultTransposeAdd_MPIAIJ,
2172 #ifdef PETSC_HAVE_PBGL
2173        MatSolve_MPIAIJ,
2174 #else
2175        0,
2176 #endif
2177        0,
2178        0,
2179 /*10*/ 0,
2180        0,
2181        0,
2182        MatRelax_MPIAIJ,
2183        MatTranspose_MPIAIJ,
2184 /*15*/ MatGetInfo_MPIAIJ,
2185        MatEqual_MPIAIJ,
2186        MatGetDiagonal_MPIAIJ,
2187        MatDiagonalScale_MPIAIJ,
2188        MatNorm_MPIAIJ,
2189 /*20*/ MatAssemblyBegin_MPIAIJ,
2190        MatAssemblyEnd_MPIAIJ,
2191        0,
2192        MatSetOption_MPIAIJ,
2193        MatZeroEntries_MPIAIJ,
2194 /*25*/ MatZeroRows_MPIAIJ,
2195        0,
2196 #ifdef PETSC_HAVE_PBGL
2197        MatLUFactorNumeric_MPIAIJ,
2198 #else
2199        0,
2200 #endif
2201        0,
2202        0,
2203 /*30*/ MatSetUpPreallocation_MPIAIJ,
2204 #ifdef PETSC_HAVE_PBGL
2205        MatILUFactorSymbolic_MPIAIJ,
2206 #else
2207        0,
2208 #endif
2209        0,
2210        0,
2211        0,
2212 /*35*/ MatDuplicate_MPIAIJ,
2213        0,
2214        0,
2215        0,
2216        0,
2217 /*40*/ MatAXPY_MPIAIJ,
2218        MatGetSubMatrices_MPIAIJ,
2219        MatIncreaseOverlap_MPIAIJ,
2220        MatGetValues_MPIAIJ,
2221        MatCopy_MPIAIJ,
2222 /*45*/ 0,
2223        MatScale_MPIAIJ,
2224        0,
2225        0,
2226        0,
2227 /*50*/ MatSetBlockSize_MPIAIJ,
2228        0,
2229        0,
2230        0,
2231        0,
2232 /*55*/ MatFDColoringCreate_MPIAIJ,
2233        0,
2234        MatSetUnfactored_MPIAIJ,
2235        MatPermute_MPIAIJ,
2236        0,
2237 /*60*/ MatGetSubMatrix_MPIAIJ,
2238        MatDestroy_MPIAIJ,
2239        MatView_MPIAIJ,
2240        0,
2241        0,
2242 /*65*/ 0,
2243        0,
2244        0,
2245        0,
2246        0,
2247 /*70*/ 0,
2248        0,
2249        MatSetColoring_MPIAIJ,
2250 #if defined(PETSC_HAVE_ADIC)
2251        MatSetValuesAdic_MPIAIJ,
2252 #else
2253        0,
2254 #endif
2255        MatSetValuesAdifor_MPIAIJ,
2256 /*75*/ 0,
2257        0,
2258        0,
2259        0,
2260        0,
2261 /*80*/ 0,
2262        0,
2263        0,
2264        0,
2265 /*84*/ MatLoad_MPIAIJ,
2266        0,
2267        0,
2268        0,
2269        0,
2270        0,
2271 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2272        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2273        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2274        MatPtAP_Basic,
2275        MatPtAPSymbolic_MPIAIJ,
2276 /*95*/ MatPtAPNumeric_MPIAIJ,
2277        0,
2278        0,
2279        0,
2280        0,
2281 /*100*/0,
2282        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2283        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2284        MatConjugate_MPIAIJ,
2285        0,
2286 /*105*/MatSetValuesRow_MPIAIJ,
2287        MatRealPart_MPIAIJ,
2288        MatImaginaryPart_MPIAIJ,
2289        0,
2290        0,
2291 /*110*/0,
2292        MatGetRedundantMatrix_MPIAIJ};
2293 
2294 /* ----------------------------------------------------------------------------------------*/
2295 
2296 EXTERN_C_BEGIN
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2299 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2300 {
2301   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2306   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 EXTERN_C_END
2310 
2311 EXTERN_C_BEGIN
2312 #undef __FUNCT__
2313 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2314 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2315 {
2316   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2321   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 EXTERN_C_END
2325 
2326 #include "petscpc.h"
2327 EXTERN_C_BEGIN
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2330 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2331 {
2332   Mat_MPIAIJ     *b;
2333   PetscErrorCode ierr;
2334   PetscInt       i;
2335 
2336   PetscFunctionBegin;
2337   B->preallocated = PETSC_TRUE;
2338   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2339   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2340   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2341   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2342 
2343   B->rmap.bs = B->cmap.bs = 1;
2344   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2345   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2346   if (d_nnz) {
2347     for (i=0; i<B->rmap.n; i++) {
2348       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2349     }
2350   }
2351   if (o_nnz) {
2352     for (i=0; i<B->rmap.n; i++) {
2353       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2354     }
2355   }
2356   b = (Mat_MPIAIJ*)B->data;
2357 
2358   /* Explicitly create 2 MATSEQAIJ matrices. */
2359   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2360   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2361   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2362   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2363   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2364   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2365   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2366   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2367 
2368   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2369   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2370 
2371   PetscFunctionReturn(0);
2372 }
2373 EXTERN_C_END
2374 
2375 #undef __FUNCT__
2376 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2377 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2378 {
2379   Mat            mat;
2380   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2381   PetscErrorCode ierr;
2382 
2383   PetscFunctionBegin;
2384   *newmat       = 0;
2385   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2386   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2387   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2388   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2389   a    = (Mat_MPIAIJ*)mat->data;
2390 
2391   mat->factor       = matin->factor;
2392   mat->rmap.bs      = matin->rmap.bs;
2393   mat->assembled    = PETSC_TRUE;
2394   mat->insertmode   = NOT_SET_VALUES;
2395   mat->preallocated = PETSC_TRUE;
2396 
2397   a->size           = oldmat->size;
2398   a->rank           = oldmat->rank;
2399   a->donotstash     = oldmat->donotstash;
2400   a->roworiented    = oldmat->roworiented;
2401   a->rowindices     = 0;
2402   a->rowvalues      = 0;
2403   a->getrowactive   = PETSC_FALSE;
2404 
2405   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2406   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2407 
2408   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2409   if (oldmat->colmap) {
2410 #if defined (PETSC_USE_CTABLE)
2411     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2412 #else
2413     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2414     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2415     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2416 #endif
2417   } else a->colmap = 0;
2418   if (oldmat->garray) {
2419     PetscInt len;
2420     len  = oldmat->B->cmap.n;
2421     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2422     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2423     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2424   } else a->garray = 0;
2425 
2426   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2427   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2428   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2429   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2430   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2431   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2432   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2433   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2434   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2435   *newmat = mat;
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 #include "petscsys.h"
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatLoad_MPIAIJ"
2443 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2444 {
2445   Mat            A;
2446   PetscScalar    *vals,*svals;
2447   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2448   MPI_Status     status;
2449   PetscErrorCode ierr;
2450   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2451   PetscInt       i,nz,j,rstart,rend,mmax;
2452   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2453   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2454   PetscInt       cend,cstart,n,*rowners;
2455   int            fd;
2456 
2457   PetscFunctionBegin;
2458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2459   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2460   if (!rank) {
2461     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2462     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2463     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2464   }
2465 
2466   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2467   M = header[1]; N = header[2];
2468   /* determine ownership of all rows */
2469   m    = M/size + ((M % size) > rank);
2470   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2471   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2472 
2473   /* First process needs enough room for process with most rows */
2474   if (!rank) {
2475     mmax       = rowners[1];
2476     for (i=2; i<size; i++) {
2477       mmax = PetscMax(mmax,rowners[i]);
2478     }
2479   } else mmax = m;
2480 
2481   rowners[0] = 0;
2482   for (i=2; i<=size; i++) {
2483     rowners[i] += rowners[i-1];
2484   }
2485   rstart = rowners[rank];
2486   rend   = rowners[rank+1];
2487 
2488   /* distribute row lengths to all processors */
2489   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2490   if (!rank) {
2491     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2492     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2493     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2494     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2495     for (j=0; j<m; j++) {
2496       procsnz[0] += ourlens[j];
2497     }
2498     for (i=1; i<size; i++) {
2499       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2500       /* calculate the number of nonzeros on each processor */
2501       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2502         procsnz[i] += rowlengths[j];
2503       }
2504       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2505     }
2506     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2507   } else {
2508     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2509   }
2510 
2511   if (!rank) {
2512     /* determine max buffer needed and allocate it */
2513     maxnz = 0;
2514     for (i=0; i<size; i++) {
2515       maxnz = PetscMax(maxnz,procsnz[i]);
2516     }
2517     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2518 
2519     /* read in my part of the matrix column indices  */
2520     nz   = procsnz[0];
2521     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2522     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2523 
2524     /* read in every one elses and ship off */
2525     for (i=1; i<size; i++) {
2526       nz   = procsnz[i];
2527       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2528       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2529     }
2530     ierr = PetscFree(cols);CHKERRQ(ierr);
2531   } else {
2532     /* determine buffer space needed for message */
2533     nz = 0;
2534     for (i=0; i<m; i++) {
2535       nz += ourlens[i];
2536     }
2537     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2538 
2539     /* receive message of column indices*/
2540     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2541     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2542     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2543   }
2544 
2545   /* determine column ownership if matrix is not square */
2546   if (N != M) {
2547     n      = N/size + ((N % size) > rank);
2548     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2549     cstart = cend - n;
2550   } else {
2551     cstart = rstart;
2552     cend   = rend;
2553     n      = cend - cstart;
2554   }
2555 
2556   /* loop over local rows, determining number of off diagonal entries */
2557   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2558   jj = 0;
2559   for (i=0; i<m; i++) {
2560     for (j=0; j<ourlens[i]; j++) {
2561       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2562       jj++;
2563     }
2564   }
2565 
2566   /* create our matrix */
2567   for (i=0; i<m; i++) {
2568     ourlens[i] -= offlens[i];
2569   }
2570   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2571   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2572   ierr = MatSetType(A,type);CHKERRQ(ierr);
2573   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2574 
2575   for (i=0; i<m; i++) {
2576     ourlens[i] += offlens[i];
2577   }
2578 
2579   if (!rank) {
2580     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2581 
2582     /* read in my part of the matrix numerical values  */
2583     nz   = procsnz[0];
2584     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2585 
2586     /* insert into matrix */
2587     jj      = rstart;
2588     smycols = mycols;
2589     svals   = vals;
2590     for (i=0; i<m; i++) {
2591       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2592       smycols += ourlens[i];
2593       svals   += ourlens[i];
2594       jj++;
2595     }
2596 
2597     /* read in other processors and ship out */
2598     for (i=1; i<size; i++) {
2599       nz   = procsnz[i];
2600       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2601       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2602     }
2603     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2604   } else {
2605     /* receive numeric values */
2606     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2607 
2608     /* receive message of values*/
2609     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2610     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2611     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2612 
2613     /* insert into matrix */
2614     jj      = rstart;
2615     smycols = mycols;
2616     svals   = vals;
2617     for (i=0; i<m; i++) {
2618       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2619       smycols += ourlens[i];
2620       svals   += ourlens[i];
2621       jj++;
2622     }
2623   }
2624   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2625   ierr = PetscFree(vals);CHKERRQ(ierr);
2626   ierr = PetscFree(mycols);CHKERRQ(ierr);
2627   ierr = PetscFree(rowners);CHKERRQ(ierr);
2628 
2629   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2630   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2631   *newmat = A;
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2637 /*
2638     Not great since it makes two copies of the submatrix, first an SeqAIJ
2639   in local and then by concatenating the local matrices the end result.
2640   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2641 */
2642 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2643 {
2644   PetscErrorCode ierr;
2645   PetscMPIInt    rank,size;
2646   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2647   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2648   Mat            *local,M,Mreuse;
2649   PetscScalar    *vwork,*aa;
2650   MPI_Comm       comm = mat->comm;
2651   Mat_SeqAIJ     *aij;
2652 
2653 
2654   PetscFunctionBegin;
2655   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2657 
2658   if (call ==  MAT_REUSE_MATRIX) {
2659     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2660     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2661     local = &Mreuse;
2662     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2663   } else {
2664     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2665     Mreuse = *local;
2666     ierr   = PetscFree(local);CHKERRQ(ierr);
2667   }
2668 
2669   /*
2670       m - number of local rows
2671       n - number of columns (same on all processors)
2672       rstart - first row in new global matrix generated
2673   */
2674   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2675   if (call == MAT_INITIAL_MATRIX) {
2676     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2677     ii  = aij->i;
2678     jj  = aij->j;
2679 
2680     /*
2681         Determine the number of non-zeros in the diagonal and off-diagonal
2682         portions of the matrix in order to do correct preallocation
2683     */
2684 
2685     /* first get start and end of "diagonal" columns */
2686     if (csize == PETSC_DECIDE) {
2687       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2688       if (mglobal == n) { /* square matrix */
2689 	nlocal = m;
2690       } else {
2691         nlocal = n/size + ((n % size) > rank);
2692       }
2693     } else {
2694       nlocal = csize;
2695     }
2696     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2697     rstart = rend - nlocal;
2698     if (rank == size - 1 && rend != n) {
2699       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2700     }
2701 
2702     /* next, compute all the lengths */
2703     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2704     olens = dlens + m;
2705     for (i=0; i<m; i++) {
2706       jend = ii[i+1] - ii[i];
2707       olen = 0;
2708       dlen = 0;
2709       for (j=0; j<jend; j++) {
2710         if (*jj < rstart || *jj >= rend) olen++;
2711         else dlen++;
2712         jj++;
2713       }
2714       olens[i] = olen;
2715       dlens[i] = dlen;
2716     }
2717     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2718     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2719     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2720     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2721     ierr = PetscFree(dlens);CHKERRQ(ierr);
2722   } else {
2723     PetscInt ml,nl;
2724 
2725     M = *newmat;
2726     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2727     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2728     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2729     /*
2730          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2731        rather than the slower MatSetValues().
2732     */
2733     M->was_assembled = PETSC_TRUE;
2734     M->assembled     = PETSC_FALSE;
2735   }
2736   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2737   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2738   ii  = aij->i;
2739   jj  = aij->j;
2740   aa  = aij->a;
2741   for (i=0; i<m; i++) {
2742     row   = rstart + i;
2743     nz    = ii[i+1] - ii[i];
2744     cwork = jj;     jj += nz;
2745     vwork = aa;     aa += nz;
2746     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2747   }
2748 
2749   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2751   *newmat = M;
2752 
2753   /* save submatrix used in processor for next request */
2754   if (call ==  MAT_INITIAL_MATRIX) {
2755     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2756     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2757   }
2758 
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 EXTERN_C_BEGIN
2763 #undef __FUNCT__
2764 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2766 {
2767   PetscInt       m,cstart, cend,j,nnz,i,d;
2768   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2769   const PetscInt *JJ;
2770   PetscScalar    *values;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2775 
2776   B->rmap.bs = B->cmap.bs = 1;
2777   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2778   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2779   m      = B->rmap.n;
2780   cstart = B->cmap.rstart;
2781   cend   = B->cmap.rend;
2782   rstart = B->rmap.rstart;
2783 
2784   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2785   o_nnz = d_nnz + m;
2786 
2787   for (i=0; i<m; i++) {
2788     nnz     = Ii[i+1]- Ii[i];
2789     JJ      = J + Ii[i];
2790     nnz_max = PetscMax(nnz_max,nnz);
2791     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2792     for (j=0; j<nnz; j++) {
2793       if (*JJ >= cstart) break;
2794       JJ++;
2795     }
2796     d = 0;
2797     for (; j<nnz; j++) {
2798       if (*JJ++ >= cend) break;
2799       d++;
2800     }
2801     d_nnz[i] = d;
2802     o_nnz[i] = nnz - d;
2803   }
2804   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2805   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2806 
2807   if (v) values = (PetscScalar*)v;
2808   else {
2809     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2810     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2811   }
2812 
2813   for (i=0; i<m; i++) {
2814     ii   = i + rstart;
2815     nnz  = Ii[i+1]- Ii[i];
2816     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2817   }
2818   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2820 
2821   if (!v) {
2822     ierr = PetscFree(values);CHKERRQ(ierr);
2823   }
2824   PetscFunctionReturn(0);
2825 }
2826 EXTERN_C_END
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2830 /*@
2831    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2832    (the default parallel PETSc format).
2833 
2834    Collective on MPI_Comm
2835 
2836    Input Parameters:
2837 +  B - the matrix
2838 .  i - the indices into j for the start of each local row (starts with zero)
2839 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2840 -  v - optional values in the matrix
2841 
2842    Level: developer
2843 
2844    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2845      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2846 
2847 .keywords: matrix, aij, compressed row, sparse, parallel
2848 
2849 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2850           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2851 @*/
2852 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2853 {
2854   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2855 
2856   PetscFunctionBegin;
2857   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2858   if (f) {
2859     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2860   }
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2866 /*@C
2867    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2868    (the default parallel PETSc format).  For good matrix assembly performance
2869    the user should preallocate the matrix storage by setting the parameters
2870    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2871    performance can be increased by more than a factor of 50.
2872 
2873    Collective on MPI_Comm
2874 
2875    Input Parameters:
2876 +  A - the matrix
2877 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2878            (same value is used for all local rows)
2879 .  d_nnz - array containing the number of nonzeros in the various rows of the
2880            DIAGONAL portion of the local submatrix (possibly different for each row)
2881            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2882            The size of this array is equal to the number of local rows, i.e 'm'.
2883            You must leave room for the diagonal entry even if it is zero.
2884 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2885            submatrix (same value is used for all local rows).
2886 -  o_nnz - array containing the number of nonzeros in the various rows of the
2887            OFF-DIAGONAL portion of the local submatrix (possibly different for
2888            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2889            structure. The size of this array is equal to the number
2890            of local rows, i.e 'm'.
2891 
2892    If the *_nnz parameter is given then the *_nz parameter is ignored
2893 
2894    The AIJ format (also called the Yale sparse matrix format or
2895    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2896    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2897 
2898    The parallel matrix is partitioned such that the first m0 rows belong to
2899    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2900    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2901 
2902    The DIAGONAL portion of the local submatrix of a processor can be defined
2903    as the submatrix which is obtained by extraction the part corresponding
2904    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2905    first row that belongs to the processor, and r2 is the last row belonging
2906    to the this processor. This is a square mxm matrix. The remaining portion
2907    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2908 
2909    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2910 
2911    Example usage:
2912 
2913    Consider the following 8x8 matrix with 34 non-zero values, that is
2914    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2915    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2916    as follows:
2917 
2918 .vb
2919             1  2  0  |  0  3  0  |  0  4
2920     Proc0   0  5  6  |  7  0  0  |  8  0
2921             9  0 10  | 11  0  0  | 12  0
2922     -------------------------------------
2923            13  0 14  | 15 16 17  |  0  0
2924     Proc1   0 18  0  | 19 20 21  |  0  0
2925             0  0  0  | 22 23  0  | 24  0
2926     -------------------------------------
2927     Proc2  25 26 27  |  0  0 28  | 29  0
2928            30  0  0  | 31 32 33  |  0 34
2929 .ve
2930 
2931    This can be represented as a collection of submatrices as:
2932 
2933 .vb
2934       A B C
2935       D E F
2936       G H I
2937 .ve
2938 
2939    Where the submatrices A,B,C are owned by proc0, D,E,F are
2940    owned by proc1, G,H,I are owned by proc2.
2941 
2942    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2943    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2944    The 'M','N' parameters are 8,8, and have the same values on all procs.
2945 
2946    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2947    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2948    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2949    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2950    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2951    matrix, ans [DF] as another SeqAIJ matrix.
2952 
2953    When d_nz, o_nz parameters are specified, d_nz storage elements are
2954    allocated for every row of the local diagonal submatrix, and o_nz
2955    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2956    One way to choose d_nz and o_nz is to use the max nonzerors per local
2957    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2958    In this case, the values of d_nz,o_nz are:
2959 .vb
2960      proc0 : dnz = 2, o_nz = 2
2961      proc1 : dnz = 3, o_nz = 2
2962      proc2 : dnz = 1, o_nz = 4
2963 .ve
2964    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2965    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2966    for proc3. i.e we are using 12+15+10=37 storage locations to store
2967    34 values.
2968 
2969    When d_nnz, o_nnz parameters are specified, the storage is specified
2970    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2971    In the above case the values for d_nnz,o_nnz are:
2972 .vb
2973      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2974      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2975      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2976 .ve
2977    Here the space allocated is sum of all the above values i.e 34, and
2978    hence pre-allocation is perfect.
2979 
2980    Level: intermediate
2981 
2982 .keywords: matrix, aij, compressed row, sparse, parallel
2983 
2984 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2985           MPIAIJ
2986 @*/
2987 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2988 {
2989   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2990 
2991   PetscFunctionBegin;
2992   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2993   if (f) {
2994     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2995   }
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3001 /*@C
3002      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3003          CSR format the local rows.
3004 
3005    Collective on MPI_Comm
3006 
3007    Input Parameters:
3008 +  comm - MPI communicator
3009 .  m - number of local rows (Cannot be PETSC_DECIDE)
3010 .  n - This value should be the same as the local size used in creating the
3011        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3012        calculated if N is given) For square matrices n is almost always m.
3013 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3014 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3015 .   i - row indices
3016 .   j - column indices
3017 -   a - matrix values
3018 
3019    Output Parameter:
3020 .   mat - the matrix
3021 
3022    Level: intermediate
3023 
3024    Notes:
3025        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3026      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3027      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3028 
3029        The i and j indices are 0 based
3030 
3031 .keywords: matrix, aij, compressed row, sparse, parallel
3032 
3033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3034           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3035 @*/
3036 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3037 {
3038   PetscErrorCode ierr;
3039 
3040  PetscFunctionBegin;
3041   if (i[0]) {
3042     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3043   }
3044   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3045   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3046   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3047   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 #undef __FUNCT__
3052 #define __FUNCT__ "MatCreateMPIAIJ"
3053 /*@C
3054    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3055    (the default parallel PETSc format).  For good matrix assembly performance
3056    the user should preallocate the matrix storage by setting the parameters
3057    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3058    performance can be increased by more than a factor of 50.
3059 
3060    Collective on MPI_Comm
3061 
3062    Input Parameters:
3063 +  comm - MPI communicator
3064 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3065            This value should be the same as the local size used in creating the
3066            y vector for the matrix-vector product y = Ax.
3067 .  n - This value should be the same as the local size used in creating the
3068        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3069        calculated if N is given) For square matrices n is almost always m.
3070 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3071 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3072 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3073            (same value is used for all local rows)
3074 .  d_nnz - array containing the number of nonzeros in the various rows of the
3075            DIAGONAL portion of the local submatrix (possibly different for each row)
3076            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3077            The size of this array is equal to the number of local rows, i.e 'm'.
3078            You must leave room for the diagonal entry even if it is zero.
3079 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3080            submatrix (same value is used for all local rows).
3081 -  o_nnz - array containing the number of nonzeros in the various rows of the
3082            OFF-DIAGONAL portion of the local submatrix (possibly different for
3083            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3084            structure. The size of this array is equal to the number
3085            of local rows, i.e 'm'.
3086 
3087    Output Parameter:
3088 .  A - the matrix
3089 
3090    Notes:
3091    If the *_nnz parameter is given then the *_nz parameter is ignored
3092 
3093    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3094    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3095    storage requirements for this matrix.
3096 
3097    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3098    processor than it must be used on all processors that share the object for
3099    that argument.
3100 
3101    The user MUST specify either the local or global matrix dimensions
3102    (possibly both).
3103 
3104    The parallel matrix is partitioned across processors such that the
3105    first m0 rows belong to process 0, the next m1 rows belong to
3106    process 1, the next m2 rows belong to process 2 etc.. where
3107    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3108    values corresponding to [m x N] submatrix.
3109 
3110    The columns are logically partitioned with the n0 columns belonging
3111    to 0th partition, the next n1 columns belonging to the next
3112    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3113 
3114    The DIAGONAL portion of the local submatrix on any given processor
3115    is the submatrix corresponding to the rows and columns m,n
3116    corresponding to the given processor. i.e diagonal matrix on
3117    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3118    etc. The remaining portion of the local submatrix [m x (N-n)]
3119    constitute the OFF-DIAGONAL portion. The example below better
3120    illustrates this concept.
3121 
3122    For a square global matrix we define each processor's diagonal portion
3123    to be its local rows and the corresponding columns (a square submatrix);
3124    each processor's off-diagonal portion encompasses the remainder of the
3125    local matrix (a rectangular submatrix).
3126 
3127    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3128 
3129    When calling this routine with a single process communicator, a matrix of
3130    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3131    type of communicator, use the construction mechanism:
3132      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3133 
3134    By default, this format uses inodes (identical nodes) when possible.
3135    We search for consecutive rows with the same nonzero structure, thereby
3136    reusing matrix information to achieve increased efficiency.
3137 
3138    Options Database Keys:
3139 +  -mat_no_inode  - Do not use inodes
3140 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3141 -  -mat_aij_oneindex - Internally use indexing starting at 1
3142         rather than 0.  Note that when calling MatSetValues(),
3143         the user still MUST index entries starting at 0!
3144 
3145 
3146    Example usage:
3147 
3148    Consider the following 8x8 matrix with 34 non-zero values, that is
3149    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3150    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3151    as follows:
3152 
3153 .vb
3154             1  2  0  |  0  3  0  |  0  4
3155     Proc0   0  5  6  |  7  0  0  |  8  0
3156             9  0 10  | 11  0  0  | 12  0
3157     -------------------------------------
3158            13  0 14  | 15 16 17  |  0  0
3159     Proc1   0 18  0  | 19 20 21  |  0  0
3160             0  0  0  | 22 23  0  | 24  0
3161     -------------------------------------
3162     Proc2  25 26 27  |  0  0 28  | 29  0
3163            30  0  0  | 31 32 33  |  0 34
3164 .ve
3165 
3166    This can be represented as a collection of submatrices as:
3167 
3168 .vb
3169       A B C
3170       D E F
3171       G H I
3172 .ve
3173 
3174    Where the submatrices A,B,C are owned by proc0, D,E,F are
3175    owned by proc1, G,H,I are owned by proc2.
3176 
3177    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3178    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3179    The 'M','N' parameters are 8,8, and have the same values on all procs.
3180 
3181    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3182    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3183    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3184    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3185    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3186    matrix, ans [DF] as another SeqAIJ matrix.
3187 
3188    When d_nz, o_nz parameters are specified, d_nz storage elements are
3189    allocated for every row of the local diagonal submatrix, and o_nz
3190    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3191    One way to choose d_nz and o_nz is to use the max nonzerors per local
3192    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3193    In this case, the values of d_nz,o_nz are:
3194 .vb
3195      proc0 : dnz = 2, o_nz = 2
3196      proc1 : dnz = 3, o_nz = 2
3197      proc2 : dnz = 1, o_nz = 4
3198 .ve
3199    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3200    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3201    for proc3. i.e we are using 12+15+10=37 storage locations to store
3202    34 values.
3203 
3204    When d_nnz, o_nnz parameters are specified, the storage is specified
3205    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3206    In the above case the values for d_nnz,o_nnz are:
3207 .vb
3208      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3209      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3210      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3211 .ve
3212    Here the space allocated is sum of all the above values i.e 34, and
3213    hence pre-allocation is perfect.
3214 
3215    Level: intermediate
3216 
3217 .keywords: matrix, aij, compressed row, sparse, parallel
3218 
3219 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3220           MPIAIJ, MatCreateMPIAIJWithArrays()
3221 @*/
3222 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3223 {
3224   PetscErrorCode ierr;
3225   PetscMPIInt    size;
3226 
3227   PetscFunctionBegin;
3228   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3229   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3230   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3231   if (size > 1) {
3232     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3233     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3234   } else {
3235     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3236     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3237   }
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 #undef __FUNCT__
3242 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3243 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3244 {
3245   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3246 
3247   PetscFunctionBegin;
3248   *Ad     = a->A;
3249   *Ao     = a->B;
3250   *colmap = a->garray;
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 #undef __FUNCT__
3255 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3256 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3257 {
3258   PetscErrorCode ierr;
3259   PetscInt       i;
3260   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3261 
3262   PetscFunctionBegin;
3263   if (coloring->ctype == IS_COLORING_GLOBAL) {
3264     ISColoringValue *allcolors,*colors;
3265     ISColoring      ocoloring;
3266 
3267     /* set coloring for diagonal portion */
3268     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3269 
3270     /* set coloring for off-diagonal portion */
3271     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3272     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3273     for (i=0; i<a->B->cmap.n; i++) {
3274       colors[i] = allcolors[a->garray[i]];
3275     }
3276     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3277     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3278     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3279     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3280   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3281     ISColoringValue *colors;
3282     PetscInt        *larray;
3283     ISColoring      ocoloring;
3284 
3285     /* set coloring for diagonal portion */
3286     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3287     for (i=0; i<a->A->cmap.n; i++) {
3288       larray[i] = i + A->cmap.rstart;
3289     }
3290     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3291     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3292     for (i=0; i<a->A->cmap.n; i++) {
3293       colors[i] = coloring->colors[larray[i]];
3294     }
3295     ierr = PetscFree(larray);CHKERRQ(ierr);
3296     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3297     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3298     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3299 
3300     /* set coloring for off-diagonal portion */
3301     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3302     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3303     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3304     for (i=0; i<a->B->cmap.n; i++) {
3305       colors[i] = coloring->colors[larray[i]];
3306     }
3307     ierr = PetscFree(larray);CHKERRQ(ierr);
3308     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3309     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3310     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3311   } else {
3312     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3313   }
3314 
3315   PetscFunctionReturn(0);
3316 }
3317 
3318 #if defined(PETSC_HAVE_ADIC)
3319 #undef __FUNCT__
3320 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3321 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3322 {
3323   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3324   PetscErrorCode ierr;
3325 
3326   PetscFunctionBegin;
3327   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3328   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3329   PetscFunctionReturn(0);
3330 }
3331 #endif
3332 
3333 #undef __FUNCT__
3334 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3335 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3336 {
3337   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3338   PetscErrorCode ierr;
3339 
3340   PetscFunctionBegin;
3341   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3342   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3343   PetscFunctionReturn(0);
3344 }
3345 
3346 #undef __FUNCT__
3347 #define __FUNCT__ "MatMerge"
3348 /*@C
3349       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3350                  matrices from each processor
3351 
3352     Collective on MPI_Comm
3353 
3354    Input Parameters:
3355 +    comm - the communicators the parallel matrix will live on
3356 .    inmat - the input sequential matrices
3357 .    n - number of local columns (or PETSC_DECIDE)
3358 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3359 
3360    Output Parameter:
3361 .    outmat - the parallel matrix generated
3362 
3363     Level: advanced
3364 
3365    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3366 
3367 @*/
3368 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3369 {
3370   PetscErrorCode ierr;
3371   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3372   PetscInt       *indx;
3373   PetscScalar    *values;
3374 
3375   PetscFunctionBegin;
3376   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3377   if (scall == MAT_INITIAL_MATRIX){
3378     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3379     if (n == PETSC_DECIDE){
3380       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3381     }
3382     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3383     rstart -= m;
3384 
3385     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3386     for (i=0;i<m;i++) {
3387       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3388       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3389       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3390     }
3391     /* This routine will ONLY return MPIAIJ type matrix */
3392     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3393     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3394     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3395     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3396     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3397 
3398   } else if (scall == MAT_REUSE_MATRIX){
3399     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3400   } else {
3401     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3402   }
3403 
3404   for (i=0;i<m;i++) {
3405     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3406     Ii    = i + rstart;
3407     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3408     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3409   }
3410   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3411   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3413 
3414   PetscFunctionReturn(0);
3415 }
3416 
3417 #undef __FUNCT__
3418 #define __FUNCT__ "MatFileSplit"
3419 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3420 {
3421   PetscErrorCode    ierr;
3422   PetscMPIInt       rank;
3423   PetscInt          m,N,i,rstart,nnz;
3424   size_t            len;
3425   const PetscInt    *indx;
3426   PetscViewer       out;
3427   char              *name;
3428   Mat               B;
3429   const PetscScalar *values;
3430 
3431   PetscFunctionBegin;
3432   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3433   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3434   /* Should this be the type of the diagonal block of A? */
3435   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3436   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3437   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3438   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3439   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3440   for (i=0;i<m;i++) {
3441     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3442     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3443     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3444   }
3445   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3446   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3447 
3448   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3449   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3450   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3451   sprintf(name,"%s.%d",outfile,rank);
3452   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3453   ierr = PetscFree(name);
3454   ierr = MatView(B,out);CHKERRQ(ierr);
3455   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3456   ierr = MatDestroy(B);CHKERRQ(ierr);
3457   PetscFunctionReturn(0);
3458 }
3459 
3460 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3461 #undef __FUNCT__
3462 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3463 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3464 {
3465   PetscErrorCode       ierr;
3466   Mat_Merge_SeqsToMPI  *merge;
3467   PetscContainer       container;
3468 
3469   PetscFunctionBegin;
3470   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3471   if (container) {
3472     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3473     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3474     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3475     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3476     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3477     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3478     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3479     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3482     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3483     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3484 
3485     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3486     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3487   }
3488   ierr = PetscFree(merge);CHKERRQ(ierr);
3489 
3490   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #include "src/mat/utils/freespace.h"
3495 #include "petscbt.h"
3496 static PetscEvent logkey_seqstompinum = 0;
3497 #undef __FUNCT__
3498 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3499 /*@C
3500       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3501                  matrices from each processor
3502 
3503     Collective on MPI_Comm
3504 
3505    Input Parameters:
3506 +    comm - the communicators the parallel matrix will live on
3507 .    seqmat - the input sequential matrices
3508 .    m - number of local rows (or PETSC_DECIDE)
3509 .    n - number of local columns (or PETSC_DECIDE)
3510 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3511 
3512    Output Parameter:
3513 .    mpimat - the parallel matrix generated
3514 
3515     Level: advanced
3516 
3517    Notes:
3518      The dimensions of the sequential matrix in each processor MUST be the same.
3519      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3520      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3521 @*/
3522 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3523 {
3524   PetscErrorCode       ierr;
3525   MPI_Comm             comm=mpimat->comm;
3526   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3527   PetscMPIInt          size,rank,taga,*len_s;
3528   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3529   PetscInt             proc,m;
3530   PetscInt             **buf_ri,**buf_rj;
3531   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3532   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3533   MPI_Request          *s_waits,*r_waits;
3534   MPI_Status           *status;
3535   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3536   Mat_Merge_SeqsToMPI  *merge;
3537   PetscContainer       container;
3538 
3539   PetscFunctionBegin;
3540   if (!logkey_seqstompinum) {
3541     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3542   }
3543   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3544 
3545   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3546   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3547 
3548   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3549   if (container) {
3550     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3551   }
3552   bi     = merge->bi;
3553   bj     = merge->bj;
3554   buf_ri = merge->buf_ri;
3555   buf_rj = merge->buf_rj;
3556 
3557   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3558   owners = merge->rowmap.range;
3559   len_s  = merge->len_s;
3560 
3561   /* send and recv matrix values */
3562   /*-----------------------------*/
3563   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3564   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3565 
3566   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3567   for (proc=0,k=0; proc<size; proc++){
3568     if (!len_s[proc]) continue;
3569     i = owners[proc];
3570     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3571     k++;
3572   }
3573 
3574   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3575   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3576   ierr = PetscFree(status);CHKERRQ(ierr);
3577 
3578   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3579   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3580 
3581   /* insert mat values of mpimat */
3582   /*----------------------------*/
3583   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3584   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3585   nextrow = buf_ri_k + merge->nrecv;
3586   nextai  = nextrow + merge->nrecv;
3587 
3588   for (k=0; k<merge->nrecv; k++){
3589     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3590     nrows = *(buf_ri_k[k]);
3591     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3592     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3593   }
3594 
3595   /* set values of ba */
3596   m = merge->rowmap.n;
3597   for (i=0; i<m; i++) {
3598     arow = owners[rank] + i;
3599     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3600     bnzi = bi[i+1] - bi[i];
3601     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3602 
3603     /* add local non-zero vals of this proc's seqmat into ba */
3604     anzi = ai[arow+1] - ai[arow];
3605     aj   = a->j + ai[arow];
3606     aa   = a->a + ai[arow];
3607     nextaj = 0;
3608     for (j=0; nextaj<anzi; j++){
3609       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3610         ba_i[j] += aa[nextaj++];
3611       }
3612     }
3613 
3614     /* add received vals into ba */
3615     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3616       /* i-th row */
3617       if (i == *nextrow[k]) {
3618         anzi = *(nextai[k]+1) - *nextai[k];
3619         aj   = buf_rj[k] + *(nextai[k]);
3620         aa   = abuf_r[k] + *(nextai[k]);
3621         nextaj = 0;
3622         for (j=0; nextaj<anzi; j++){
3623           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3624             ba_i[j] += aa[nextaj++];
3625           }
3626         }
3627         nextrow[k]++; nextai[k]++;
3628       }
3629     }
3630     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3631   }
3632   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634 
3635   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3636   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3637   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3638   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3639   PetscFunctionReturn(0);
3640 }
3641 
3642 static PetscEvent logkey_seqstompisym = 0;
3643 #undef __FUNCT__
3644 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3645 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3646 {
3647   PetscErrorCode       ierr;
3648   Mat                  B_mpi;
3649   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3650   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3651   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3652   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3653   PetscInt             len,proc,*dnz,*onz;
3654   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3655   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3656   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3657   MPI_Status           *status;
3658   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3659   PetscBT              lnkbt;
3660   Mat_Merge_SeqsToMPI  *merge;
3661   PetscContainer       container;
3662 
3663   PetscFunctionBegin;
3664   if (!logkey_seqstompisym) {
3665     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3666   }
3667   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3668 
3669   /* make sure it is a PETSc comm */
3670   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3672   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3673 
3674   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3675   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3676 
3677   /* determine row ownership */
3678   /*---------------------------------------------------------*/
3679   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3680   merge->rowmap.n = m;
3681   merge->rowmap.N = M;
3682   merge->rowmap.bs = 1;
3683   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3684   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3685   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3686 
3687   m      = merge->rowmap.n;
3688   M      = merge->rowmap.N;
3689   owners = merge->rowmap.range;
3690 
3691   /* determine the number of messages to send, their lengths */
3692   /*---------------------------------------------------------*/
3693   len_s  = merge->len_s;
3694 
3695   len = 0;  /* length of buf_si[] */
3696   merge->nsend = 0;
3697   for (proc=0; proc<size; proc++){
3698     len_si[proc] = 0;
3699     if (proc == rank){
3700       len_s[proc] = 0;
3701     } else {
3702       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3703       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3704     }
3705     if (len_s[proc]) {
3706       merge->nsend++;
3707       nrows = 0;
3708       for (i=owners[proc]; i<owners[proc+1]; i++){
3709         if (ai[i+1] > ai[i]) nrows++;
3710       }
3711       len_si[proc] = 2*(nrows+1);
3712       len += len_si[proc];
3713     }
3714   }
3715 
3716   /* determine the number and length of messages to receive for ij-structure */
3717   /*-------------------------------------------------------------------------*/
3718   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3719   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3720 
3721   /* post the Irecv of j-structure */
3722   /*-------------------------------*/
3723   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3724   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3725 
3726   /* post the Isend of j-structure */
3727   /*--------------------------------*/
3728   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3729   sj_waits = si_waits + merge->nsend;
3730 
3731   for (proc=0, k=0; proc<size; proc++){
3732     if (!len_s[proc]) continue;
3733     i = owners[proc];
3734     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3735     k++;
3736   }
3737 
3738   /* receives and sends of j-structure are complete */
3739   /*------------------------------------------------*/
3740   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3741   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3742 
3743   /* send and recv i-structure */
3744   /*---------------------------*/
3745   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3746   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3747 
3748   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3749   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3750   for (proc=0,k=0; proc<size; proc++){
3751     if (!len_s[proc]) continue;
3752     /* form outgoing message for i-structure:
3753          buf_si[0]:                 nrows to be sent
3754                [1:nrows]:           row index (global)
3755                [nrows+1:2*nrows+1]: i-structure index
3756     */
3757     /*-------------------------------------------*/
3758     nrows = len_si[proc]/2 - 1;
3759     buf_si_i    = buf_si + nrows+1;
3760     buf_si[0]   = nrows;
3761     buf_si_i[0] = 0;
3762     nrows = 0;
3763     for (i=owners[proc]; i<owners[proc+1]; i++){
3764       anzi = ai[i+1] - ai[i];
3765       if (anzi) {
3766         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3767         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3768         nrows++;
3769       }
3770     }
3771     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3772     k++;
3773     buf_si += len_si[proc];
3774   }
3775 
3776   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3777   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3778 
3779   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3780   for (i=0; i<merge->nrecv; i++){
3781     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3782   }
3783 
3784   ierr = PetscFree(len_si);CHKERRQ(ierr);
3785   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3786   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3787   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3788   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3789   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3790   ierr = PetscFree(status);CHKERRQ(ierr);
3791 
3792   /* compute a local seq matrix in each processor */
3793   /*----------------------------------------------*/
3794   /* allocate bi array and free space for accumulating nonzero column info */
3795   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3796   bi[0] = 0;
3797 
3798   /* create and initialize a linked list */
3799   nlnk = N+1;
3800   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3801 
3802   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3803   len = 0;
3804   len  = ai[owners[rank+1]] - ai[owners[rank]];
3805   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3806   current_space = free_space;
3807 
3808   /* determine symbolic info for each local row */
3809   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3810   nextrow = buf_ri_k + merge->nrecv;
3811   nextai  = nextrow + merge->nrecv;
3812   for (k=0; k<merge->nrecv; k++){
3813     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3814     nrows = *buf_ri_k[k];
3815     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3816     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3817   }
3818 
3819   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3820   len = 0;
3821   for (i=0;i<m;i++) {
3822     bnzi   = 0;
3823     /* add local non-zero cols of this proc's seqmat into lnk */
3824     arow   = owners[rank] + i;
3825     anzi   = ai[arow+1] - ai[arow];
3826     aj     = a->j + ai[arow];
3827     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3828     bnzi += nlnk;
3829     /* add received col data into lnk */
3830     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3831       if (i == *nextrow[k]) { /* i-th row */
3832         anzi = *(nextai[k]+1) - *nextai[k];
3833         aj   = buf_rj[k] + *nextai[k];
3834         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3835         bnzi += nlnk;
3836         nextrow[k]++; nextai[k]++;
3837       }
3838     }
3839     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3840 
3841     /* if free space is not available, make more free space */
3842     if (current_space->local_remaining<bnzi) {
3843       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3844       nspacedouble++;
3845     }
3846     /* copy data into free space, then initialize lnk */
3847     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3848     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3849 
3850     current_space->array           += bnzi;
3851     current_space->local_used      += bnzi;
3852     current_space->local_remaining -= bnzi;
3853 
3854     bi[i+1] = bi[i] + bnzi;
3855   }
3856 
3857   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3858 
3859   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3860   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3861   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3862 
3863   /* create symbolic parallel matrix B_mpi */
3864   /*---------------------------------------*/
3865   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3866   if (n==PETSC_DECIDE) {
3867     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3868   } else {
3869     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3870   }
3871   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3872   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3873   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3874 
3875   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3876   B_mpi->assembled     = PETSC_FALSE;
3877   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3878   merge->bi            = bi;
3879   merge->bj            = bj;
3880   merge->buf_ri        = buf_ri;
3881   merge->buf_rj        = buf_rj;
3882   merge->coi           = PETSC_NULL;
3883   merge->coj           = PETSC_NULL;
3884   merge->owners_co     = PETSC_NULL;
3885 
3886   /* attach the supporting struct to B_mpi for reuse */
3887   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3888   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3889   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3890   *mpimat = B_mpi;
3891 
3892   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3893   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 static PetscEvent logkey_seqstompi = 0;
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatMerge_SeqsToMPI"
3900 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3901 {
3902   PetscErrorCode   ierr;
3903 
3904   PetscFunctionBegin;
3905   if (!logkey_seqstompi) {
3906     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3907   }
3908   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3909   if (scall == MAT_INITIAL_MATRIX){
3910     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3911   }
3912   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3913   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 static PetscEvent logkey_getlocalmat = 0;
3917 #undef __FUNCT__
3918 #define __FUNCT__ "MatGetLocalMat"
3919 /*@C
3920      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3921 
3922     Not Collective
3923 
3924    Input Parameters:
3925 +    A - the matrix
3926 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3927 
3928    Output Parameter:
3929 .    A_loc - the local sequential matrix generated
3930 
3931     Level: developer
3932 
3933 @*/
3934 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3935 {
3936   PetscErrorCode  ierr;
3937   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3938   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3939   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3940   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3941   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3942   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3943 
3944   PetscFunctionBegin;
3945   if (!logkey_getlocalmat) {
3946     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3947   }
3948   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3949   if (scall == MAT_INITIAL_MATRIX){
3950     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3951     ci[0] = 0;
3952     for (i=0; i<am; i++){
3953       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3954     }
3955     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3956     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3957     k = 0;
3958     for (i=0; i<am; i++) {
3959       ncols_o = bi[i+1] - bi[i];
3960       ncols_d = ai[i+1] - ai[i];
3961       /* off-diagonal portion of A */
3962       for (jo=0; jo<ncols_o; jo++) {
3963         col = cmap[*bj];
3964         if (col >= cstart) break;
3965         cj[k]   = col; bj++;
3966         ca[k++] = *ba++;
3967       }
3968       /* diagonal portion of A */
3969       for (j=0; j<ncols_d; j++) {
3970         cj[k]   = cstart + *aj++;
3971         ca[k++] = *aa++;
3972       }
3973       /* off-diagonal portion of A */
3974       for (j=jo; j<ncols_o; j++) {
3975         cj[k]   = cmap[*bj++];
3976         ca[k++] = *ba++;
3977       }
3978     }
3979     /* put together the new matrix */
3980     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3981     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3982     /* Since these are PETSc arrays, change flags to free them as necessary. */
3983     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3984     mat->free_a  = PETSC_TRUE;
3985     mat->free_ij = PETSC_TRUE;
3986     mat->nonew   = 0;
3987   } else if (scall == MAT_REUSE_MATRIX){
3988     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3989     ci = mat->i; cj = mat->j; ca = mat->a;
3990     for (i=0; i<am; i++) {
3991       /* off-diagonal portion of A */
3992       ncols_o = bi[i+1] - bi[i];
3993       for (jo=0; jo<ncols_o; jo++) {
3994         col = cmap[*bj];
3995         if (col >= cstart) break;
3996         *ca++ = *ba++; bj++;
3997       }
3998       /* diagonal portion of A */
3999       ncols_d = ai[i+1] - ai[i];
4000       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4001       /* off-diagonal portion of A */
4002       for (j=jo; j<ncols_o; j++) {
4003         *ca++ = *ba++; bj++;
4004       }
4005     }
4006   } else {
4007     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4008   }
4009 
4010   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4011   PetscFunctionReturn(0);
4012 }
4013 
4014 static PetscEvent logkey_getlocalmatcondensed = 0;
4015 #undef __FUNCT__
4016 #define __FUNCT__ "MatGetLocalMatCondensed"
4017 /*@C
4018      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4019 
4020     Not Collective
4021 
4022    Input Parameters:
4023 +    A - the matrix
4024 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4025 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4026 
4027    Output Parameter:
4028 .    A_loc - the local sequential matrix generated
4029 
4030     Level: developer
4031 
4032 @*/
4033 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4034 {
4035   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4036   PetscErrorCode    ierr;
4037   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4038   IS                isrowa,iscola;
4039   Mat               *aloc;
4040 
4041   PetscFunctionBegin;
4042   if (!logkey_getlocalmatcondensed) {
4043     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4044   }
4045   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4046   if (!row){
4047     start = A->rmap.rstart; end = A->rmap.rend;
4048     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4049   } else {
4050     isrowa = *row;
4051   }
4052   if (!col){
4053     start = A->cmap.rstart;
4054     cmap  = a->garray;
4055     nzA   = a->A->cmap.n;
4056     nzB   = a->B->cmap.n;
4057     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4058     ncols = 0;
4059     for (i=0; i<nzB; i++) {
4060       if (cmap[i] < start) idx[ncols++] = cmap[i];
4061       else break;
4062     }
4063     imark = i;
4064     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4065     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4066     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4067     ierr = PetscFree(idx);CHKERRQ(ierr);
4068   } else {
4069     iscola = *col;
4070   }
4071   if (scall != MAT_INITIAL_MATRIX){
4072     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4073     aloc[0] = *A_loc;
4074   }
4075   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4076   *A_loc = aloc[0];
4077   ierr = PetscFree(aloc);CHKERRQ(ierr);
4078   if (!row){
4079     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4080   }
4081   if (!col){
4082     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4083   }
4084   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4085   PetscFunctionReturn(0);
4086 }
4087 
4088 static PetscEvent logkey_GetBrowsOfAcols = 0;
4089 #undef __FUNCT__
4090 #define __FUNCT__ "MatGetBrowsOfAcols"
4091 /*@C
4092     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4093 
4094     Collective on Mat
4095 
4096    Input Parameters:
4097 +    A,B - the matrices in mpiaij format
4098 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4099 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4100 
4101    Output Parameter:
4102 +    rowb, colb - index sets of rows and columns of B to extract
4103 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4104 -    B_seq - the sequential matrix generated
4105 
4106     Level: developer
4107 
4108 @*/
4109 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4110 {
4111   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4112   PetscErrorCode    ierr;
4113   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4114   IS                isrowb,iscolb;
4115   Mat               *bseq;
4116 
4117   PetscFunctionBegin;
4118   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4119     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4120   }
4121   if (!logkey_GetBrowsOfAcols) {
4122     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4123   }
4124   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4125 
4126   if (scall == MAT_INITIAL_MATRIX){
4127     start = A->cmap.rstart;
4128     cmap  = a->garray;
4129     nzA   = a->A->cmap.n;
4130     nzB   = a->B->cmap.n;
4131     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4132     ncols = 0;
4133     for (i=0; i<nzB; i++) {  /* row < local row index */
4134       if (cmap[i] < start) idx[ncols++] = cmap[i];
4135       else break;
4136     }
4137     imark = i;
4138     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4139     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4140     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4141     ierr = PetscFree(idx);CHKERRQ(ierr);
4142     *brstart = imark;
4143     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4144   } else {
4145     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4146     isrowb = *rowb; iscolb = *colb;
4147     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4148     bseq[0] = *B_seq;
4149   }
4150   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4151   *B_seq = bseq[0];
4152   ierr = PetscFree(bseq);CHKERRQ(ierr);
4153   if (!rowb){
4154     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4155   } else {
4156     *rowb = isrowb;
4157   }
4158   if (!colb){
4159     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4160   } else {
4161     *colb = iscolb;
4162   }
4163   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4164   PetscFunctionReturn(0);
4165 }
4166 
4167 static PetscEvent logkey_GetBrowsOfAocols = 0;
4168 #undef __FUNCT__
4169 #define __FUNCT__ "MatGetBrowsOfAoCols"
4170 /*@C
4171     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4172     of the OFF-DIAGONAL portion of local A
4173 
4174     Collective on Mat
4175 
4176    Input Parameters:
4177 +    A,B - the matrices in mpiaij format
4178 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4179 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4180 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4181 
4182    Output Parameter:
4183 +    B_oth - the sequential matrix generated
4184 
4185     Level: developer
4186 
4187 @*/
4188 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4189 {
4190   VecScatter_MPI_General *gen_to,*gen_from;
4191   PetscErrorCode         ierr;
4192   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4193   Mat_SeqAIJ             *b_oth;
4194   VecScatter             ctx=a->Mvctx;
4195   MPI_Comm               comm=ctx->comm;
4196   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4197   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4198   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4199   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4200   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4201   MPI_Status             *sstatus,rstatus;
4202   PetscMPIInt            jj;
4203   PetscInt               *cols,sbs,rbs;
4204   PetscScalar            *vals;
4205 
4206   PetscFunctionBegin;
4207   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4208     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4209   }
4210   if (!logkey_GetBrowsOfAocols) {
4211     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4212   }
4213   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4214   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4215 
4216   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4217   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4218   rvalues  = gen_from->values; /* holds the length of receiving row */
4219   svalues  = gen_to->values;   /* holds the length of sending row */
4220   nrecvs   = gen_from->n;
4221   nsends   = gen_to->n;
4222 
4223   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4224   srow     = gen_to->indices;   /* local row index to be sent */
4225   sstarts  = gen_to->starts;
4226   sprocs   = gen_to->procs;
4227   sstatus  = gen_to->sstatus;
4228   sbs      = gen_to->bs;
4229   rstarts  = gen_from->starts;
4230   rprocs   = gen_from->procs;
4231   rbs      = gen_from->bs;
4232 
4233   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4234   if (scall == MAT_INITIAL_MATRIX){
4235     /* i-array */
4236     /*---------*/
4237     /*  post receives */
4238     for (i=0; i<nrecvs; i++){
4239       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4240       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4241       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4242     }
4243 
4244     /* pack the outgoing message */
4245     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4246     rstartsj = sstartsj + nsends +1;
4247     sstartsj[0] = 0;  rstartsj[0] = 0;
4248     len = 0; /* total length of j or a array to be sent */
4249     k = 0;
4250     for (i=0; i<nsends; i++){
4251       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4252       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4253       for (j=0; j<nrows; j++) {
4254         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4255         for (l=0; l<sbs; l++){
4256           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4257           rowlen[j*sbs+l] = ncols;
4258           len += ncols;
4259           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4260         }
4261         k++;
4262       }
4263       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4264       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4265     }
4266     /* recvs and sends of i-array are completed */
4267     i = nrecvs;
4268     while (i--) {
4269       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4270     }
4271     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4272 
4273     /* allocate buffers for sending j and a arrays */
4274     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4275     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4276 
4277     /* create i-array of B_oth */
4278     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4279     b_othi[0] = 0;
4280     len = 0; /* total length of j or a array to be received */
4281     k = 0;
4282     for (i=0; i<nrecvs; i++){
4283       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4284       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4285       for (j=0; j<nrows; j++) {
4286         b_othi[k+1] = b_othi[k] + rowlen[j];
4287         len += rowlen[j]; k++;
4288       }
4289       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4290     }
4291 
4292     /* allocate space for j and a arrrays of B_oth */
4293     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4294     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4295 
4296     /* j-array */
4297     /*---------*/
4298     /*  post receives of j-array */
4299     for (i=0; i<nrecvs; i++){
4300       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4301       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4302     }
4303 
4304     /* pack the outgoing message j-array */
4305     k = 0;
4306     for (i=0; i<nsends; i++){
4307       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4308       bufJ = bufj+sstartsj[i];
4309       for (j=0; j<nrows; j++) {
4310         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4311         for (ll=0; ll<sbs; ll++){
4312           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4313           for (l=0; l<ncols; l++){
4314             *bufJ++ = cols[l];
4315           }
4316           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4317         }
4318       }
4319       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4320     }
4321 
4322     /* recvs and sends of j-array are completed */
4323     i = nrecvs;
4324     while (i--) {
4325       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4326     }
4327     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4328   } else if (scall == MAT_REUSE_MATRIX){
4329     sstartsj = *startsj;
4330     rstartsj = sstartsj + nsends +1;
4331     bufa     = *bufa_ptr;
4332     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4333     b_otha   = b_oth->a;
4334   } else {
4335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4336   }
4337 
4338   /* a-array */
4339   /*---------*/
4340   /*  post receives of a-array */
4341   for (i=0; i<nrecvs; i++){
4342     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4343     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4344   }
4345 
4346   /* pack the outgoing message a-array */
4347   k = 0;
4348   for (i=0; i<nsends; i++){
4349     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4350     bufA = bufa+sstartsj[i];
4351     for (j=0; j<nrows; j++) {
4352       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4353       for (ll=0; ll<sbs; ll++){
4354         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4355         for (l=0; l<ncols; l++){
4356           *bufA++ = vals[l];
4357         }
4358         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4359       }
4360     }
4361     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4362   }
4363   /* recvs and sends of a-array are completed */
4364   i = nrecvs;
4365   while (i--) {
4366     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4367   }
4368   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4369   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4370 
4371   if (scall == MAT_INITIAL_MATRIX){
4372     /* put together the new matrix */
4373     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4374 
4375     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4376     /* Since these are PETSc arrays, change flags to free them as necessary. */
4377     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4378     b_oth->free_a  = PETSC_TRUE;
4379     b_oth->free_ij = PETSC_TRUE;
4380     b_oth->nonew   = 0;
4381 
4382     ierr = PetscFree(bufj);CHKERRQ(ierr);
4383     if (!startsj || !bufa_ptr){
4384       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4385       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4386     } else {
4387       *startsj  = sstartsj;
4388       *bufa_ptr = bufa;
4389     }
4390   }
4391   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4392   PetscFunctionReturn(0);
4393 }
4394 
4395 #undef __FUNCT__
4396 #define __FUNCT__ "MatGetCommunicationStructs"
4397 /*@C
4398   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4399 
4400   Not Collective
4401 
4402   Input Parameters:
4403 . A - The matrix in mpiaij format
4404 
4405   Output Parameter:
4406 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4407 . colmap - A map from global column index to local index into lvec
4408 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4409 
4410   Level: developer
4411 
4412 @*/
4413 #if defined (PETSC_USE_CTABLE)
4414 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4415 #else
4416 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4417 #endif
4418 {
4419   Mat_MPIAIJ *a;
4420 
4421   PetscFunctionBegin;
4422   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4423   PetscValidPointer(lvec, 2)
4424   PetscValidPointer(colmap, 3)
4425   PetscValidPointer(multScatter, 4)
4426   a = (Mat_MPIAIJ *) A->data;
4427   if (lvec) *lvec = a->lvec;
4428   if (colmap) *colmap = a->colmap;
4429   if (multScatter) *multScatter = a->Mvctx;
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 EXTERN_C_BEGIN
4434 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4435 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4436 EXTERN_C_END
4437 
4438 /*MC
4439    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4440 
4441    Options Database Keys:
4442 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4443 
4444   Level: beginner
4445 
4446 .seealso: MatCreateMPIAIJ
4447 M*/
4448 
4449 EXTERN_C_BEGIN
4450 #undef __FUNCT__
4451 #define __FUNCT__ "MatCreate_MPIAIJ"
4452 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4453 {
4454   Mat_MPIAIJ     *b;
4455   PetscErrorCode ierr;
4456   PetscMPIInt    size;
4457 
4458   PetscFunctionBegin;
4459   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4460 
4461   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4462   B->data         = (void*)b;
4463   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4464   B->factor       = 0;
4465   B->rmap.bs      = 1;
4466   B->assembled    = PETSC_FALSE;
4467   B->mapping      = 0;
4468 
4469   B->insertmode      = NOT_SET_VALUES;
4470   b->size            = size;
4471   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4472 
4473   /* build cache for off array entries formed */
4474   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4475   b->donotstash  = PETSC_FALSE;
4476   b->colmap      = 0;
4477   b->garray      = 0;
4478   b->roworiented = PETSC_TRUE;
4479 
4480   /* stuff used for matrix vector multiply */
4481   b->lvec      = PETSC_NULL;
4482   b->Mvctx     = PETSC_NULL;
4483 
4484   /* stuff for MatGetRow() */
4485   b->rowindices   = 0;
4486   b->rowvalues    = 0;
4487   b->getrowactive = PETSC_FALSE;
4488 
4489 
4490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4491                                      "MatStoreValues_MPIAIJ",
4492                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4494                                      "MatRetrieveValues_MPIAIJ",
4495                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4497 				     "MatGetDiagonalBlock_MPIAIJ",
4498                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4500 				     "MatIsTranspose_MPIAIJ",
4501 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4503 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4504 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4506 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4507 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4509 				     "MatDiagonalScaleLocal_MPIAIJ",
4510 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4512                                      "MatConvert_MPIAIJ_MPICSRPERM",
4513                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4515                                      "MatConvert_MPIAIJ_MPICRL",
4516                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4517   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 EXTERN_C_END
4521 
4522 #undef __FUNCT__
4523 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4524 /*@C
4525      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4526          and "off-diagonal" part of the matrix in CSR format.
4527 
4528    Collective on MPI_Comm
4529 
4530    Input Parameters:
4531 +  comm - MPI communicator
4532 .  m - number of local rows (Cannot be PETSC_DECIDE)
4533 .  n - This value should be the same as the local size used in creating the
4534        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4535        calculated if N is given) For square matrices n is almost always m.
4536 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4537 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4538 .   i - row indices for "diagonal" portion of matrix
4539 .   j - column indices
4540 .   a - matrix values
4541 .   oi - row indices for "off-diagonal" portion of matrix
4542 .   oj - column indices
4543 -   oa - matrix values
4544 
4545    Output Parameter:
4546 .   mat - the matrix
4547 
4548    Level: advanced
4549 
4550    Notes:
4551        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4552 
4553        The i and j indices are 0 based
4554 
4555        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4556 
4557 
4558 .keywords: matrix, aij, compressed row, sparse, parallel
4559 
4560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4561           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4562 @*/
4563 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4564 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4565 {
4566   PetscErrorCode ierr;
4567   Mat_MPIAIJ     *maij;
4568 
4569  PetscFunctionBegin;
4570   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4571   if (i[0]) {
4572     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4573   }
4574   if (oi[0]) {
4575     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4576   }
4577   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4578   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4579   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4580   maij = (Mat_MPIAIJ*) (*mat)->data;
4581   maij->donotstash     = PETSC_TRUE;
4582   (*mat)->preallocated = PETSC_TRUE;
4583 
4584   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4585   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4586   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4587 
4588   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4589   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4590 
4591   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4593   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4594   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595 
4596   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598   PetscFunctionReturn(0);
4599 }
4600 
4601 /*
4602     Special version for direct calls from Fortran
4603 */
4604 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4605 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4607 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4608 #endif
4609 
4610 /* Change these macros so can be used in void function */
4611 #undef CHKERRQ
4612 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4613 #undef SETERRQ2
4614 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4615 #undef SETERRQ
4616 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4617 
4618 EXTERN_C_BEGIN
4619 #undef __FUNCT__
4620 #define __FUNCT__ "matsetvaluesmpiaij_"
4621 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4622 {
4623   Mat            mat = *mmat;
4624   PetscInt       m = *mm, n = *mn;
4625   InsertMode     addv = *maddv;
4626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4627   PetscScalar    value;
4628   PetscErrorCode ierr;
4629 
4630   MatPreallocated(mat);
4631   if (mat->insertmode == NOT_SET_VALUES) {
4632     mat->insertmode = addv;
4633   }
4634 #if defined(PETSC_USE_DEBUG)
4635   else if (mat->insertmode != addv) {
4636     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4637   }
4638 #endif
4639   {
4640   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4641   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4642   PetscTruth     roworiented = aij->roworiented;
4643 
4644   /* Some Variables required in the macro */
4645   Mat            A = aij->A;
4646   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4647   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4648   PetscScalar    *aa = a->a;
4649   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4650   Mat            B = aij->B;
4651   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4652   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4653   PetscScalar    *ba = b->a;
4654 
4655   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4656   PetscInt       nonew = a->nonew;
4657   PetscScalar    *ap1,*ap2;
4658 
4659   PetscFunctionBegin;
4660   for (i=0; i<m; i++) {
4661     if (im[i] < 0) continue;
4662 #if defined(PETSC_USE_DEBUG)
4663     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4664 #endif
4665     if (im[i] >= rstart && im[i] < rend) {
4666       row      = im[i] - rstart;
4667       lastcol1 = -1;
4668       rp1      = aj + ai[row];
4669       ap1      = aa + ai[row];
4670       rmax1    = aimax[row];
4671       nrow1    = ailen[row];
4672       low1     = 0;
4673       high1    = nrow1;
4674       lastcol2 = -1;
4675       rp2      = bj + bi[row];
4676       ap2      = ba + bi[row];
4677       rmax2    = bimax[row];
4678       nrow2    = bilen[row];
4679       low2     = 0;
4680       high2    = nrow2;
4681 
4682       for (j=0; j<n; j++) {
4683         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4684         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4685         if (in[j] >= cstart && in[j] < cend){
4686           col = in[j] - cstart;
4687           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4688         } else if (in[j] < 0) continue;
4689 #if defined(PETSC_USE_DEBUG)
4690         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4691 #endif
4692         else {
4693           if (mat->was_assembled) {
4694             if (!aij->colmap) {
4695               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4696             }
4697 #if defined (PETSC_USE_CTABLE)
4698             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4699 	    col--;
4700 #else
4701             col = aij->colmap[in[j]] - 1;
4702 #endif
4703             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4704               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4705               col =  in[j];
4706               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4707               B = aij->B;
4708               b = (Mat_SeqAIJ*)B->data;
4709               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4710               rp2      = bj + bi[row];
4711               ap2      = ba + bi[row];
4712               rmax2    = bimax[row];
4713               nrow2    = bilen[row];
4714               low2     = 0;
4715               high2    = nrow2;
4716               bm       = aij->B->rmap.n;
4717               ba = b->a;
4718             }
4719           } else col = in[j];
4720           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4721         }
4722       }
4723     } else {
4724       if (!aij->donotstash) {
4725         if (roworiented) {
4726           if (ignorezeroentries && v[i*n] == 0.0) continue;
4727           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4728         } else {
4729           if (ignorezeroentries && v[i] == 0.0) continue;
4730           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4731         }
4732       }
4733     }
4734   }}
4735   PetscFunctionReturnVoid();
4736 }
4737 EXTERN_C_END
4738 
4739