xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4e0d8c253c5baa1c48327ddb2b78bf7a43f58497)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_COLUMNS_SORTED:
1208   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1209   case MAT_KEEP_ZEROED_ROWS:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211   case MAT_USE_INODES:
1212   case MAT_IGNORE_ZERO_ENTRIES:
1213     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1214     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1215     break;
1216   case MAT_ROW_ORIENTED:
1217     a->roworiented = flg;
1218     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1219     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1220     break;
1221   case MAT_ROWS_SORTED:
1222   case MAT_NEW_DIAGONALS:
1223     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1224     break;
1225   case MAT_IGNORE_OFF_PROC_ENTRIES:
1226     a->donotstash = PETSC_TRUE;
1227     break;
1228   case MAT_SYMMETRIC:
1229     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1230     break;
1231   case MAT_STRUCTURALLY_SYMMETRIC:
1232   case MAT_HERMITIAN:
1233   case MAT_SYMMETRY_ETERNAL:
1234     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1235     break;
1236   default:
1237     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatGetRow_MPIAIJ"
1244 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1245 {
1246   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1247   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1248   PetscErrorCode ierr;
1249   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1250   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1251   PetscInt       *cmap,*idx_p;
1252 
1253   PetscFunctionBegin;
1254   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1255   mat->getrowactive = PETSC_TRUE;
1256 
1257   if (!mat->rowvalues && (idx || v)) {
1258     /*
1259         allocate enough space to hold information from the longest row.
1260     */
1261     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1262     PetscInt     max = 1,tmp;
1263     for (i=0; i<matin->rmap.n; i++) {
1264       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1265       if (max < tmp) { max = tmp; }
1266     }
1267     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1268     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1269   }
1270 
1271   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1272   lrow = row - rstart;
1273 
1274   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1275   if (!v)   {pvA = 0; pvB = 0;}
1276   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1277   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1278   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1279   nztot = nzA + nzB;
1280 
1281   cmap  = mat->garray;
1282   if (v  || idx) {
1283     if (nztot) {
1284       /* Sort by increasing column numbers, assuming A and B already sorted */
1285       PetscInt imark = -1;
1286       if (v) {
1287         *v = v_p = mat->rowvalues;
1288         for (i=0; i<nzB; i++) {
1289           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1290           else break;
1291         }
1292         imark = i;
1293         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1294         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1295       }
1296       if (idx) {
1297         *idx = idx_p = mat->rowindices;
1298         if (imark > -1) {
1299           for (i=0; i<imark; i++) {
1300             idx_p[i] = cmap[cworkB[i]];
1301           }
1302         } else {
1303           for (i=0; i<nzB; i++) {
1304             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1305             else break;
1306           }
1307           imark = i;
1308         }
1309         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1310         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1311       }
1312     } else {
1313       if (idx) *idx = 0;
1314       if (v)   *v   = 0;
1315     }
1316   }
1317   *nz = nztot;
1318   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1319   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1325 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326 {
1327   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1328 
1329   PetscFunctionBegin;
1330   if (!aij->getrowactive) {
1331     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1332   }
1333   aij->getrowactive = PETSC_FALSE;
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatNorm_MPIAIJ"
1339 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1340 {
1341   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1342   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1343   PetscErrorCode ierr;
1344   PetscInt       i,j,cstart = mat->cmap.rstart;
1345   PetscReal      sum = 0.0;
1346   PetscScalar    *v;
1347 
1348   PetscFunctionBegin;
1349   if (aij->size == 1) {
1350     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1351   } else {
1352     if (type == NORM_FROBENIUS) {
1353       v = amat->a;
1354       for (i=0; i<amat->nz; i++) {
1355 #if defined(PETSC_USE_COMPLEX)
1356         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1357 #else
1358         sum += (*v)*(*v); v++;
1359 #endif
1360       }
1361       v = bmat->a;
1362       for (i=0; i<bmat->nz; i++) {
1363 #if defined(PETSC_USE_COMPLEX)
1364         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1365 #else
1366         sum += (*v)*(*v); v++;
1367 #endif
1368       }
1369       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1370       *norm = sqrt(*norm);
1371     } else if (type == NORM_1) { /* max column norm */
1372       PetscReal *tmp,*tmp2;
1373       PetscInt    *jj,*garray = aij->garray;
1374       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1375       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1376       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1377       *norm = 0.0;
1378       v = amat->a; jj = amat->j;
1379       for (j=0; j<amat->nz; j++) {
1380         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1381       }
1382       v = bmat->a; jj = bmat->j;
1383       for (j=0; j<bmat->nz; j++) {
1384         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1385       }
1386       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1387       for (j=0; j<mat->cmap.N; j++) {
1388         if (tmp2[j] > *norm) *norm = tmp2[j];
1389       }
1390       ierr = PetscFree(tmp);CHKERRQ(ierr);
1391       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1392     } else if (type == NORM_INFINITY) { /* max row norm */
1393       PetscReal ntemp = 0.0;
1394       for (j=0; j<aij->A->rmap.n; j++) {
1395         v = amat->a + amat->i[j];
1396         sum = 0.0;
1397         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1398           sum += PetscAbsScalar(*v); v++;
1399         }
1400         v = bmat->a + bmat->i[j];
1401         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1402           sum += PetscAbsScalar(*v); v++;
1403         }
1404         if (sum > ntemp) ntemp = sum;
1405       }
1406       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1407     } else {
1408       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1409     }
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatTranspose_MPIAIJ"
1416 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1417 {
1418   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1419   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1420   PetscErrorCode ierr;
1421   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1422   PetscInt       cstart=A->cmap.rstart,ncol;
1423   Mat            B;
1424   PetscScalar    *array;
1425 
1426   PetscFunctionBegin;
1427   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1428 
1429   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1430   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1431   ai = Aloc->i; aj = Aloc->j;
1432   bi = Bloc->i; bj = Bloc->j;
1433   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1434   cols = d_nnz + na + 1; /* work space to be used by B part */
1435   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1436   for (i=0; i<ai[ma]; i++){
1437     d_nnz[aj[i]] ++;
1438     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1439   }
1440 
1441   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1442   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1443   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1444   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1445 
1446   /* copy over the A part */
1447   array = Aloc->a;
1448   row = A->rmap.rstart;
1449   for (i=0; i<ma; i++) {
1450     ncol = ai[i+1]-ai[i];
1451     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1452     row++; array += ncol; aj += ncol;
1453   }
1454   aj = Aloc->j;
1455   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1456 
1457   /* copy over the B part */
1458   array = Bloc->a;
1459   row = A->rmap.rstart;
1460   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1461   for (i=0; i<mb; i++) {
1462     ncol = bi[i+1]-bi[i];
1463     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1464     row++; array += ncol; cols += ncol;
1465   }
1466   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1467   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1468   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1469   if (matout) {
1470     *matout = B;
1471   } else {
1472     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1479 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1480 {
1481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1482   Mat            a = aij->A,b = aij->B;
1483   PetscErrorCode ierr;
1484   PetscInt       s1,s2,s3;
1485 
1486   PetscFunctionBegin;
1487   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1488   if (rr) {
1489     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1490     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1491     /* Overlap communication with computation. */
1492     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493   }
1494   if (ll) {
1495     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1496     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1497     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1498   }
1499   /* scale  the diagonal block */
1500   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1501 
1502   if (rr) {
1503     /* Do a scatter end and then right scale the off-diagonal block */
1504     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1506   }
1507 
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1513 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1514 {
1515   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1520   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1521   PetscFunctionReturn(0);
1522 }
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1525 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1526 {
1527   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1528   PetscErrorCode ierr;
1529 
1530   PetscFunctionBegin;
1531   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatEqual_MPIAIJ"
1537 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1538 {
1539   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1540   Mat            a,b,c,d;
1541   PetscTruth     flg;
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   a = matA->A; b = matA->B;
1546   c = matB->A; d = matB->B;
1547 
1548   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1549   if (flg) {
1550     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1551   }
1552   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatCopy_MPIAIJ"
1558 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1559 {
1560   PetscErrorCode ierr;
1561   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1562   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1563 
1564   PetscFunctionBegin;
1565   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1566   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1567     /* because of the column compression in the off-processor part of the matrix a->B,
1568        the number of columns in a->B and b->B may be different, hence we cannot call
1569        the MatCopy() directly on the two parts. If need be, we can provide a more
1570        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1571        then copying the submatrices */
1572     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1573   } else {
1574     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1575     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1582 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1583 {
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #include "petscblaslapack.h"
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatAXPY_MPIAIJ"
1594 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1595 {
1596   PetscErrorCode ierr;
1597   PetscInt       i;
1598   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1599   PetscBLASInt   bnz,one=1;
1600   Mat_SeqAIJ     *x,*y;
1601 
1602   PetscFunctionBegin;
1603   if (str == SAME_NONZERO_PATTERN) {
1604     PetscScalar alpha = a;
1605     x = (Mat_SeqAIJ *)xx->A->data;
1606     y = (Mat_SeqAIJ *)yy->A->data;
1607     bnz = (PetscBLASInt)x->nz;
1608     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1609     x = (Mat_SeqAIJ *)xx->B->data;
1610     y = (Mat_SeqAIJ *)yy->B->data;
1611     bnz = (PetscBLASInt)x->nz;
1612     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1613   } else if (str == SUBSET_NONZERO_PATTERN) {
1614     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1615 
1616     x = (Mat_SeqAIJ *)xx->B->data;
1617     y = (Mat_SeqAIJ *)yy->B->data;
1618     if (y->xtoy && y->XtoY != xx->B) {
1619       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1620       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1621     }
1622     if (!y->xtoy) { /* get xtoy */
1623       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1624       y->XtoY = xx->B;
1625       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1626     }
1627     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1628   } else {
1629     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1635 
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatConjugate_MPIAIJ"
1638 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1639 {
1640 #if defined(PETSC_USE_COMPLEX)
1641   PetscErrorCode ierr;
1642   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1643 
1644   PetscFunctionBegin;
1645   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1646   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1647 #else
1648   PetscFunctionBegin;
1649 #endif
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatRealPart_MPIAIJ"
1655 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1656 {
1657   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1658   PetscErrorCode ierr;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1662   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1668 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1669 {
1670   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1671   PetscErrorCode ierr;
1672 
1673   PetscFunctionBegin;
1674   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1675   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #ifdef PETSC_HAVE_PBGL
1680 
1681 #include <boost/parallel/mpi/bsp_process_group.hpp>
1682 #include <boost/graph/distributed/ilu_default_graph.hpp>
1683 #include <boost/graph/distributed/ilu_0_block.hpp>
1684 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1685 #include <boost/graph/distributed/petsc/interface.hpp>
1686 #include <boost/multi_array.hpp>
1687 #include <boost/parallel/distributed_property_map.hpp>
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1691 /*
1692   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1693 */
1694 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1695 {
1696   namespace petsc = boost::distributed::petsc;
1697 
1698   namespace graph_dist = boost::graph::distributed;
1699   using boost::graph::distributed::ilu_default::process_group_type;
1700   using boost::graph::ilu_permuted;
1701 
1702   PetscTruth      row_identity, col_identity;
1703   PetscContainer  c;
1704   PetscInt        m, n, M, N;
1705   PetscErrorCode  ierr;
1706 
1707   PetscFunctionBegin;
1708   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1709   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1710   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1711   if (!row_identity || !col_identity) {
1712     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1713   }
1714 
1715   process_group_type pg;
1716   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1717   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1718   lgraph_type&   level_graph = *lgraph_p;
1719   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1720 
1721   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1722   ilu_permuted(level_graph);
1723 
1724   /* put together the new matrix */
1725   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1726   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1727   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1728   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1729   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1730   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1731   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1732   (*fact)->factor = FACTOR_LU;
1733 
1734   ierr = PetscContainerCreate(A->comm, &c);
1735   ierr = PetscContainerSetPointer(c, lgraph_p);
1736   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1742 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1743 {
1744   PetscFunctionBegin;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatSolve_MPIAIJ"
1750 /*
1751   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1752 */
1753 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1754 {
1755   namespace graph_dist = boost::graph::distributed;
1756 
1757   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1758   lgraph_type*   lgraph_p;
1759   PetscContainer c;
1760   PetscErrorCode ierr;
1761 
1762   PetscFunctionBegin;
1763   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1764   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1765   ierr = VecCopy(b, x); CHKERRQ(ierr);
1766 
1767   PetscScalar* array_x;
1768   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1769   PetscInt sx;
1770   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1771 
1772   PetscScalar* array_b;
1773   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1774   PetscInt sb;
1775   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1776 
1777   lgraph_type&   level_graph = *lgraph_p;
1778   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1779 
1780   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1781   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1782                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1783 
1784   typedef boost::iterator_property_map<array_ref_type::iterator,
1785                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1786   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1787                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1788 
1789   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1790 
1791   PetscFunctionReturn(0);
1792 }
1793 #endif
1794 
1795 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1796   PetscInt       nzlocal,nsends,nrecvs;
1797   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1798   PetscScalar    *sbuf_a,**rbuf_a;
1799   PetscErrorCode (*MatDestroy)(Mat);
1800 } Mat_Redundant;
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1804 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1805 {
1806   PetscErrorCode       ierr;
1807   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1808   PetscInt             i;
1809 
1810   PetscFunctionBegin;
1811   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1812   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1813   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1814   for (i=0; i<redund->nrecvs; i++){
1815     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1816     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1817   }
1818   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1819   ierr = PetscFree(redund);CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatDestroy_MatRedundant"
1825 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1826 {
1827   PetscErrorCode  ierr;
1828   PetscContainer  container;
1829   Mat_Redundant   *redund=PETSC_NULL;
1830 
1831   PetscFunctionBegin;
1832   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1833   if (container) {
1834     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1835   } else {
1836     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1837   }
1838   A->ops->destroy = redund->MatDestroy;
1839   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1840   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1841   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1847 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1848 {
1849   PetscMPIInt    rank,size;
1850   MPI_Comm       comm=mat->comm;
1851   PetscErrorCode ierr;
1852   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1853   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1854   PetscInt       *rowrange=mat->rmap.range;
1855   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1856   Mat            A=aij->A,B=aij->B,C=*matredundant;
1857   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1858   PetscScalar    *sbuf_a;
1859   PetscInt       nzlocal=a->nz+b->nz;
1860   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1861   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1862   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1863   PetscScalar    *vals,*aworkA,*aworkB;
1864   PetscMPIInt    tag1,tag2,tag3,imdex;
1865   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1866                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1867   MPI_Status     recv_status,*send_status;
1868   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1869   PetscInt       **rbuf_j=PETSC_NULL;
1870   PetscScalar    **rbuf_a=PETSC_NULL;
1871   Mat_Redundant  *redund=PETSC_NULL;
1872   PetscContainer container;
1873 
1874   PetscFunctionBegin;
1875   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1877 
1878   if (reuse == MAT_REUSE_MATRIX) {
1879     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1880     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1881     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1882     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1883     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1884     if (container) {
1885       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1886     } else {
1887       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1888     }
1889     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1890 
1891     nsends    = redund->nsends;
1892     nrecvs    = redund->nrecvs;
1893     send_rank = redund->send_rank; recv_rank = send_rank + size;
1894     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1895     sbuf_j    = redund->sbuf_j;
1896     sbuf_a    = redund->sbuf_a;
1897     rbuf_j    = redund->rbuf_j;
1898     rbuf_a    = redund->rbuf_a;
1899   }
1900 
1901   if (reuse == MAT_INITIAL_MATRIX){
1902     PetscMPIInt  subrank,subsize;
1903     PetscInt     nleftover,np_subcomm;
1904     /* get the destination processors' id send_rank, nsends and nrecvs */
1905     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1906     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1907     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1908     recv_rank = send_rank + size;
1909     np_subcomm = size/nsubcomm;
1910     nleftover  = size - nsubcomm*np_subcomm;
1911     nsends = 0; nrecvs = 0;
1912     for (i=0; i<size; i++){ /* i=rank*/
1913       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1914         send_rank[nsends] = i; nsends++;
1915         recv_rank[nrecvs++] = i;
1916       }
1917     }
1918     if (rank >= size - nleftover){/* this proc is a leftover processor */
1919       i = size-nleftover-1;
1920       j = 0;
1921       while (j < nsubcomm - nleftover){
1922         send_rank[nsends++] = i;
1923         i--; j++;
1924       }
1925     }
1926 
1927     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1928       for (i=0; i<nleftover; i++){
1929         recv_rank[nrecvs++] = size-nleftover+i;
1930       }
1931     }
1932 
1933     /* allocate sbuf_j, sbuf_a */
1934     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1935     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1936     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1937   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1938 
1939   /* copy mat's local entries into the buffers */
1940   if (reuse == MAT_INITIAL_MATRIX){
1941     rownz_max = 0;
1942     rptr = sbuf_j;
1943     cols = sbuf_j + rend-rstart + 1;
1944     vals = sbuf_a;
1945     rptr[0] = 0;
1946     for (i=0; i<rend-rstart; i++){
1947       row = i + rstart;
1948       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1949       ncols  = nzA + nzB;
1950       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1951       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1952       /* load the column indices for this row into cols */
1953       lwrite = 0;
1954       for (l=0; l<nzB; l++) {
1955         if ((ctmp = bmap[cworkB[l]]) < cstart){
1956           vals[lwrite]   = aworkB[l];
1957           cols[lwrite++] = ctmp;
1958         }
1959       }
1960       for (l=0; l<nzA; l++){
1961         vals[lwrite]   = aworkA[l];
1962         cols[lwrite++] = cstart + cworkA[l];
1963       }
1964       for (l=0; l<nzB; l++) {
1965         if ((ctmp = bmap[cworkB[l]]) >= cend){
1966           vals[lwrite]   = aworkB[l];
1967           cols[lwrite++] = ctmp;
1968         }
1969       }
1970       vals += ncols;
1971       cols += ncols;
1972       rptr[i+1] = rptr[i] + ncols;
1973       if (rownz_max < ncols) rownz_max = ncols;
1974     }
1975     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1976   } else { /* only copy matrix values into sbuf_a */
1977     rptr = sbuf_j;
1978     vals = sbuf_a;
1979     rptr[0] = 0;
1980     for (i=0; i<rend-rstart; i++){
1981       row = i + rstart;
1982       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1983       ncols  = nzA + nzB;
1984       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1985       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1986       lwrite = 0;
1987       for (l=0; l<nzB; l++) {
1988         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1989       }
1990       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1991       for (l=0; l<nzB; l++) {
1992         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1993       }
1994       vals += ncols;
1995       rptr[i+1] = rptr[i] + ncols;
1996     }
1997   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1998 
1999   /* send nzlocal to others, and recv other's nzlocal */
2000   /*--------------------------------------------------*/
2001   if (reuse == MAT_INITIAL_MATRIX){
2002     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2003     s_waits2 = s_waits3 + nsends;
2004     s_waits1 = s_waits2 + nsends;
2005     r_waits1 = s_waits1 + nsends;
2006     r_waits2 = r_waits1 + nrecvs;
2007     r_waits3 = r_waits2 + nrecvs;
2008   } else {
2009     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2010     r_waits3 = s_waits3 + nsends;
2011   }
2012 
2013   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2014   if (reuse == MAT_INITIAL_MATRIX){
2015     /* get new tags to keep the communication clean */
2016     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2017     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2018     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2019     rbuf_nz = sbuf_nz + nsends;
2020 
2021     /* post receives of other's nzlocal */
2022     for (i=0; i<nrecvs; i++){
2023       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2024     }
2025     /* send nzlocal to others */
2026     for (i=0; i<nsends; i++){
2027       sbuf_nz[i] = nzlocal;
2028       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2029     }
2030     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2031     count = nrecvs;
2032     while (count) {
2033       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2034       recv_rank[imdex] = recv_status.MPI_SOURCE;
2035       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2036       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2037 
2038       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2039       rbuf_nz[imdex] += i + 2;
2040       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2041       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2042       count--;
2043     }
2044     /* wait on sends of nzlocal */
2045     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2046     /* send mat->i,j to others, and recv from other's */
2047     /*------------------------------------------------*/
2048     for (i=0; i<nsends; i++){
2049       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2050       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2051     }
2052     /* wait on receives of mat->i,j */
2053     /*------------------------------*/
2054     count = nrecvs;
2055     while (count) {
2056       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2057       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2058       count--;
2059     }
2060     /* wait on sends of mat->i,j */
2061     /*---------------------------*/
2062     if (nsends) {
2063       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2064     }
2065   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2066 
2067   /* post receives, send and receive mat->a */
2068   /*----------------------------------------*/
2069   for (imdex=0; imdex<nrecvs; imdex++) {
2070     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2071   }
2072   for (i=0; i<nsends; i++){
2073     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2074   }
2075   count = nrecvs;
2076   while (count) {
2077     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2078     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2079     count--;
2080   }
2081   if (nsends) {
2082     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2083   }
2084 
2085   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2086 
2087   /* create redundant matrix */
2088   /*-------------------------*/
2089   if (reuse == MAT_INITIAL_MATRIX){
2090     /* compute rownz_max for preallocation */
2091     for (imdex=0; imdex<nrecvs; imdex++){
2092       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2093       rptr = rbuf_j[imdex];
2094       for (i=0; i<j; i++){
2095         ncols = rptr[i+1] - rptr[i];
2096         if (rownz_max < ncols) rownz_max = ncols;
2097       }
2098     }
2099 
2100     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2101     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2102     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2103     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2104     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2105   } else {
2106     C = *matredundant;
2107   }
2108 
2109   /* insert local matrix entries */
2110   rptr = sbuf_j;
2111   cols = sbuf_j + rend-rstart + 1;
2112   vals = sbuf_a;
2113   for (i=0; i<rend-rstart; i++){
2114     row   = i + rstart;
2115     ncols = rptr[i+1] - rptr[i];
2116     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2117     vals += ncols;
2118     cols += ncols;
2119   }
2120   /* insert received matrix entries */
2121   for (imdex=0; imdex<nrecvs; imdex++){
2122     rstart = rowrange[recv_rank[imdex]];
2123     rend   = rowrange[recv_rank[imdex]+1];
2124     rptr = rbuf_j[imdex];
2125     cols = rbuf_j[imdex] + rend-rstart + 1;
2126     vals = rbuf_a[imdex];
2127     for (i=0; i<rend-rstart; i++){
2128       row   = i + rstart;
2129       ncols = rptr[i+1] - rptr[i];
2130       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2131       vals += ncols;
2132       cols += ncols;
2133     }
2134   }
2135   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2137   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2138   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2139   if (reuse == MAT_INITIAL_MATRIX){
2140     PetscContainer container;
2141     *matredundant = C;
2142     /* create a supporting struct and attach it to C for reuse */
2143     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2144     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2145     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2146     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2147     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2148 
2149     redund->nzlocal = nzlocal;
2150     redund->nsends  = nsends;
2151     redund->nrecvs  = nrecvs;
2152     redund->send_rank = send_rank;
2153     redund->sbuf_nz = sbuf_nz;
2154     redund->sbuf_j  = sbuf_j;
2155     redund->sbuf_a  = sbuf_a;
2156     redund->rbuf_j  = rbuf_j;
2157     redund->rbuf_a  = rbuf_a;
2158 
2159     redund->MatDestroy = C->ops->destroy;
2160     C->ops->destroy    = MatDestroy_MatRedundant;
2161   }
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 /* -------------------------------------------------------------------*/
2166 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2167        MatGetRow_MPIAIJ,
2168        MatRestoreRow_MPIAIJ,
2169        MatMult_MPIAIJ,
2170 /* 4*/ MatMultAdd_MPIAIJ,
2171        MatMultTranspose_MPIAIJ,
2172        MatMultTransposeAdd_MPIAIJ,
2173 #ifdef PETSC_HAVE_PBGL
2174        MatSolve_MPIAIJ,
2175 #else
2176        0,
2177 #endif
2178        0,
2179        0,
2180 /*10*/ 0,
2181        0,
2182        0,
2183        MatRelax_MPIAIJ,
2184        MatTranspose_MPIAIJ,
2185 /*15*/ MatGetInfo_MPIAIJ,
2186        MatEqual_MPIAIJ,
2187        MatGetDiagonal_MPIAIJ,
2188        MatDiagonalScale_MPIAIJ,
2189        MatNorm_MPIAIJ,
2190 /*20*/ MatAssemblyBegin_MPIAIJ,
2191        MatAssemblyEnd_MPIAIJ,
2192        0,
2193        MatSetOption_MPIAIJ,
2194        MatZeroEntries_MPIAIJ,
2195 /*25*/ MatZeroRows_MPIAIJ,
2196        0,
2197 #ifdef PETSC_HAVE_PBGL
2198        MatLUFactorNumeric_MPIAIJ,
2199 #else
2200        0,
2201 #endif
2202        0,
2203        0,
2204 /*30*/ MatSetUpPreallocation_MPIAIJ,
2205 #ifdef PETSC_HAVE_PBGL
2206        MatILUFactorSymbolic_MPIAIJ,
2207 #else
2208        0,
2209 #endif
2210        0,
2211        0,
2212        0,
2213 /*35*/ MatDuplicate_MPIAIJ,
2214        0,
2215        0,
2216        0,
2217        0,
2218 /*40*/ MatAXPY_MPIAIJ,
2219        MatGetSubMatrices_MPIAIJ,
2220        MatIncreaseOverlap_MPIAIJ,
2221        MatGetValues_MPIAIJ,
2222        MatCopy_MPIAIJ,
2223 /*45*/ 0,
2224        MatScale_MPIAIJ,
2225        0,
2226        0,
2227        0,
2228 /*50*/ MatSetBlockSize_MPIAIJ,
2229        0,
2230        0,
2231        0,
2232        0,
2233 /*55*/ MatFDColoringCreate_MPIAIJ,
2234        0,
2235        MatSetUnfactored_MPIAIJ,
2236        MatPermute_MPIAIJ,
2237        0,
2238 /*60*/ MatGetSubMatrix_MPIAIJ,
2239        MatDestroy_MPIAIJ,
2240        MatView_MPIAIJ,
2241        0,
2242        0,
2243 /*65*/ 0,
2244        0,
2245        0,
2246        0,
2247        0,
2248 /*70*/ 0,
2249        0,
2250        MatSetColoring_MPIAIJ,
2251 #if defined(PETSC_HAVE_ADIC)
2252        MatSetValuesAdic_MPIAIJ,
2253 #else
2254        0,
2255 #endif
2256        MatSetValuesAdifor_MPIAIJ,
2257 /*75*/ 0,
2258        0,
2259        0,
2260        0,
2261        0,
2262 /*80*/ 0,
2263        0,
2264        0,
2265        0,
2266 /*84*/ MatLoad_MPIAIJ,
2267        0,
2268        0,
2269        0,
2270        0,
2271        0,
2272 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2273        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2274        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2275        MatPtAP_Basic,
2276        MatPtAPSymbolic_MPIAIJ,
2277 /*95*/ MatPtAPNumeric_MPIAIJ,
2278        0,
2279        0,
2280        0,
2281        0,
2282 /*100*/0,
2283        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2284        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2285        MatConjugate_MPIAIJ,
2286        0,
2287 /*105*/MatSetValuesRow_MPIAIJ,
2288        MatRealPart_MPIAIJ,
2289        MatImaginaryPart_MPIAIJ,
2290        0,
2291        0,
2292 /*110*/0,
2293        MatGetRedundantMatrix_MPIAIJ};
2294 
2295 /* ----------------------------------------------------------------------------------------*/
2296 
2297 EXTERN_C_BEGIN
2298 #undef __FUNCT__
2299 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2300 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2301 {
2302   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2303   PetscErrorCode ierr;
2304 
2305   PetscFunctionBegin;
2306   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2307   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2308   PetscFunctionReturn(0);
2309 }
2310 EXTERN_C_END
2311 
2312 EXTERN_C_BEGIN
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2315 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2316 {
2317   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2322   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 EXTERN_C_END
2326 
2327 #include "petscpc.h"
2328 EXTERN_C_BEGIN
2329 #undef __FUNCT__
2330 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2331 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2332 {
2333   Mat_MPIAIJ     *b;
2334   PetscErrorCode ierr;
2335   PetscInt       i;
2336 
2337   PetscFunctionBegin;
2338   B->preallocated = PETSC_TRUE;
2339   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2340   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2341   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2342   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2343 
2344   B->rmap.bs = B->cmap.bs = 1;
2345   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2346   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2347   if (d_nnz) {
2348     for (i=0; i<B->rmap.n; i++) {
2349       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2350     }
2351   }
2352   if (o_nnz) {
2353     for (i=0; i<B->rmap.n; i++) {
2354       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2355     }
2356   }
2357   b = (Mat_MPIAIJ*)B->data;
2358 
2359   /* Explicitly create 2 MATSEQAIJ matrices. */
2360   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2361   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2362   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2363   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2364   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2365   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2366   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2367   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2368 
2369   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2370   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2371 
2372   PetscFunctionReturn(0);
2373 }
2374 EXTERN_C_END
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2378 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2379 {
2380   Mat            mat;
2381   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2382   PetscErrorCode ierr;
2383 
2384   PetscFunctionBegin;
2385   *newmat       = 0;
2386   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2387   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2388   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2389   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2390   a    = (Mat_MPIAIJ*)mat->data;
2391 
2392   mat->factor       = matin->factor;
2393   mat->rmap.bs      = matin->rmap.bs;
2394   mat->assembled    = PETSC_TRUE;
2395   mat->insertmode   = NOT_SET_VALUES;
2396   mat->preallocated = PETSC_TRUE;
2397 
2398   a->size           = oldmat->size;
2399   a->rank           = oldmat->rank;
2400   a->donotstash     = oldmat->donotstash;
2401   a->roworiented    = oldmat->roworiented;
2402   a->rowindices     = 0;
2403   a->rowvalues      = 0;
2404   a->getrowactive   = PETSC_FALSE;
2405 
2406   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2407   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2408 
2409   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2410   if (oldmat->colmap) {
2411 #if defined (PETSC_USE_CTABLE)
2412     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2413 #else
2414     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2415     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2416     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2417 #endif
2418   } else a->colmap = 0;
2419   if (oldmat->garray) {
2420     PetscInt len;
2421     len  = oldmat->B->cmap.n;
2422     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2423     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2424     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2425   } else a->garray = 0;
2426 
2427   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2428   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2429   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2430   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2431   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2432   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2433   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2434   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2435   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2436   *newmat = mat;
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #include "petscsys.h"
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatLoad_MPIAIJ"
2444 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2445 {
2446   Mat            A;
2447   PetscScalar    *vals,*svals;
2448   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2449   MPI_Status     status;
2450   PetscErrorCode ierr;
2451   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2452   PetscInt       i,nz,j,rstart,rend,mmax;
2453   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2454   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2455   PetscInt       cend,cstart,n,*rowners;
2456   int            fd;
2457 
2458   PetscFunctionBegin;
2459   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2460   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2461   if (!rank) {
2462     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2463     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2464     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2465   }
2466 
2467   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2468   M = header[1]; N = header[2];
2469   /* determine ownership of all rows */
2470   m    = M/size + ((M % size) > rank);
2471   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2472   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2473 
2474   /* First process needs enough room for process with most rows */
2475   if (!rank) {
2476     mmax       = rowners[1];
2477     for (i=2; i<size; i++) {
2478       mmax = PetscMax(mmax,rowners[i]);
2479     }
2480   } else mmax = m;
2481 
2482   rowners[0] = 0;
2483   for (i=2; i<=size; i++) {
2484     rowners[i] += rowners[i-1];
2485   }
2486   rstart = rowners[rank];
2487   rend   = rowners[rank+1];
2488 
2489   /* distribute row lengths to all processors */
2490   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2491   if (!rank) {
2492     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2493     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2494     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2495     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2496     for (j=0; j<m; j++) {
2497       procsnz[0] += ourlens[j];
2498     }
2499     for (i=1; i<size; i++) {
2500       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2501       /* calculate the number of nonzeros on each processor */
2502       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2503         procsnz[i] += rowlengths[j];
2504       }
2505       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2506     }
2507     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2508   } else {
2509     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2510   }
2511 
2512   if (!rank) {
2513     /* determine max buffer needed and allocate it */
2514     maxnz = 0;
2515     for (i=0; i<size; i++) {
2516       maxnz = PetscMax(maxnz,procsnz[i]);
2517     }
2518     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2519 
2520     /* read in my part of the matrix column indices  */
2521     nz   = procsnz[0];
2522     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2523     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2524 
2525     /* read in every one elses and ship off */
2526     for (i=1; i<size; i++) {
2527       nz   = procsnz[i];
2528       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2529       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2530     }
2531     ierr = PetscFree(cols);CHKERRQ(ierr);
2532   } else {
2533     /* determine buffer space needed for message */
2534     nz = 0;
2535     for (i=0; i<m; i++) {
2536       nz += ourlens[i];
2537     }
2538     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2539 
2540     /* receive message of column indices*/
2541     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2542     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2543     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2544   }
2545 
2546   /* determine column ownership if matrix is not square */
2547   if (N != M) {
2548     n      = N/size + ((N % size) > rank);
2549     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2550     cstart = cend - n;
2551   } else {
2552     cstart = rstart;
2553     cend   = rend;
2554     n      = cend - cstart;
2555   }
2556 
2557   /* loop over local rows, determining number of off diagonal entries */
2558   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2559   jj = 0;
2560   for (i=0; i<m; i++) {
2561     for (j=0; j<ourlens[i]; j++) {
2562       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2563       jj++;
2564     }
2565   }
2566 
2567   /* create our matrix */
2568   for (i=0; i<m; i++) {
2569     ourlens[i] -= offlens[i];
2570   }
2571   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2572   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2573   ierr = MatSetType(A,type);CHKERRQ(ierr);
2574   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2575 
2576   ierr = MatSetOption(A,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr);
2577   for (i=0; i<m; i++) {
2578     ourlens[i] += offlens[i];
2579   }
2580 
2581   if (!rank) {
2582     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2583 
2584     /* read in my part of the matrix numerical values  */
2585     nz   = procsnz[0];
2586     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2587 
2588     /* insert into matrix */
2589     jj      = rstart;
2590     smycols = mycols;
2591     svals   = vals;
2592     for (i=0; i<m; i++) {
2593       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2594       smycols += ourlens[i];
2595       svals   += ourlens[i];
2596       jj++;
2597     }
2598 
2599     /* read in other processors and ship out */
2600     for (i=1; i<size; i++) {
2601       nz   = procsnz[i];
2602       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2603       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2604     }
2605     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2606   } else {
2607     /* receive numeric values */
2608     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2609 
2610     /* receive message of values*/
2611     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2612     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2613     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2614 
2615     /* insert into matrix */
2616     jj      = rstart;
2617     smycols = mycols;
2618     svals   = vals;
2619     for (i=0; i<m; i++) {
2620       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2621       smycols += ourlens[i];
2622       svals   += ourlens[i];
2623       jj++;
2624     }
2625   }
2626   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2627   ierr = PetscFree(vals);CHKERRQ(ierr);
2628   ierr = PetscFree(mycols);CHKERRQ(ierr);
2629   ierr = PetscFree(rowners);CHKERRQ(ierr);
2630 
2631   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2632   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2633   *newmat = A;
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 #undef __FUNCT__
2638 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2639 /*
2640     Not great since it makes two copies of the submatrix, first an SeqAIJ
2641   in local and then by concatenating the local matrices the end result.
2642   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2643 */
2644 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2645 {
2646   PetscErrorCode ierr;
2647   PetscMPIInt    rank,size;
2648   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2649   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2650   Mat            *local,M,Mreuse;
2651   PetscScalar    *vwork,*aa;
2652   MPI_Comm       comm = mat->comm;
2653   Mat_SeqAIJ     *aij;
2654 
2655 
2656   PetscFunctionBegin;
2657   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2659 
2660   if (call ==  MAT_REUSE_MATRIX) {
2661     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2662     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2663     local = &Mreuse;
2664     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2665   } else {
2666     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2667     Mreuse = *local;
2668     ierr   = PetscFree(local);CHKERRQ(ierr);
2669   }
2670 
2671   /*
2672       m - number of local rows
2673       n - number of columns (same on all processors)
2674       rstart - first row in new global matrix generated
2675   */
2676   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2677   if (call == MAT_INITIAL_MATRIX) {
2678     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2679     ii  = aij->i;
2680     jj  = aij->j;
2681 
2682     /*
2683         Determine the number of non-zeros in the diagonal and off-diagonal
2684         portions of the matrix in order to do correct preallocation
2685     */
2686 
2687     /* first get start and end of "diagonal" columns */
2688     if (csize == PETSC_DECIDE) {
2689       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2690       if (mglobal == n) { /* square matrix */
2691 	nlocal = m;
2692       } else {
2693         nlocal = n/size + ((n % size) > rank);
2694       }
2695     } else {
2696       nlocal = csize;
2697     }
2698     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2699     rstart = rend - nlocal;
2700     if (rank == size - 1 && rend != n) {
2701       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2702     }
2703 
2704     /* next, compute all the lengths */
2705     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2706     olens = dlens + m;
2707     for (i=0; i<m; i++) {
2708       jend = ii[i+1] - ii[i];
2709       olen = 0;
2710       dlen = 0;
2711       for (j=0; j<jend; j++) {
2712         if (*jj < rstart || *jj >= rend) olen++;
2713         else dlen++;
2714         jj++;
2715       }
2716       olens[i] = olen;
2717       dlens[i] = dlen;
2718     }
2719     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2720     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2721     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2722     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2723     ierr = PetscFree(dlens);CHKERRQ(ierr);
2724   } else {
2725     PetscInt ml,nl;
2726 
2727     M = *newmat;
2728     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2729     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2730     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2731     /*
2732          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2733        rather than the slower MatSetValues().
2734     */
2735     M->was_assembled = PETSC_TRUE;
2736     M->assembled     = PETSC_FALSE;
2737   }
2738   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2739   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2740   ii  = aij->i;
2741   jj  = aij->j;
2742   aa  = aij->a;
2743   for (i=0; i<m; i++) {
2744     row   = rstart + i;
2745     nz    = ii[i+1] - ii[i];
2746     cwork = jj;     jj += nz;
2747     vwork = aa;     aa += nz;
2748     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2749   }
2750 
2751   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2752   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2753   *newmat = M;
2754 
2755   /* save submatrix used in processor for next request */
2756   if (call ==  MAT_INITIAL_MATRIX) {
2757     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2758     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2759   }
2760 
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 EXTERN_C_BEGIN
2765 #undef __FUNCT__
2766 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2767 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2768 {
2769   PetscInt       m,cstart, cend,j,nnz,i,d;
2770   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2771   const PetscInt *JJ;
2772   PetscScalar    *values;
2773   PetscErrorCode ierr;
2774 
2775   PetscFunctionBegin;
2776   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2777 
2778   B->rmap.bs = B->cmap.bs = 1;
2779   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2780   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2781   m      = B->rmap.n;
2782   cstart = B->cmap.rstart;
2783   cend   = B->cmap.rend;
2784   rstart = B->rmap.rstart;
2785 
2786   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2787   o_nnz = d_nnz + m;
2788 
2789   for (i=0; i<m; i++) {
2790     nnz     = Ii[i+1]- Ii[i];
2791     JJ      = J + Ii[i];
2792     nnz_max = PetscMax(nnz_max,nnz);
2793     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2794     for (j=0; j<nnz; j++) {
2795       if (*JJ >= cstart) break;
2796       JJ++;
2797     }
2798     d = 0;
2799     for (; j<nnz; j++) {
2800       if (*JJ++ >= cend) break;
2801       d++;
2802     }
2803     d_nnz[i] = d;
2804     o_nnz[i] = nnz - d;
2805   }
2806   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2807   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2808 
2809   if (v) values = (PetscScalar*)v;
2810   else {
2811     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2812     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2813   }
2814 
2815   ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr);
2816   for (i=0; i<m; i++) {
2817     ii   = i + rstart;
2818     nnz  = Ii[i+1]- Ii[i];
2819     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2820   }
2821   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2822   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2823   ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_FALSE);CHKERRQ(ierr);
2824 
2825   if (!v) {
2826     ierr = PetscFree(values);CHKERRQ(ierr);
2827   }
2828   PetscFunctionReturn(0);
2829 }
2830 EXTERN_C_END
2831 
2832 #undef __FUNCT__
2833 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2834 /*@
2835    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2836    (the default parallel PETSc format).
2837 
2838    Collective on MPI_Comm
2839 
2840    Input Parameters:
2841 +  B - the matrix
2842 .  i - the indices into j for the start of each local row (starts with zero)
2843 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2844 -  v - optional values in the matrix
2845 
2846    Level: developer
2847 
2848    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2849      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2850 
2851 .keywords: matrix, aij, compressed row, sparse, parallel
2852 
2853 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2854           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2855 @*/
2856 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2857 {
2858   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2859 
2860   PetscFunctionBegin;
2861   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2862   if (f) {
2863     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2864   }
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2870 /*@C
2871    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2872    (the default parallel PETSc format).  For good matrix assembly performance
2873    the user should preallocate the matrix storage by setting the parameters
2874    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2875    performance can be increased by more than a factor of 50.
2876 
2877    Collective on MPI_Comm
2878 
2879    Input Parameters:
2880 +  A - the matrix
2881 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2882            (same value is used for all local rows)
2883 .  d_nnz - array containing the number of nonzeros in the various rows of the
2884            DIAGONAL portion of the local submatrix (possibly different for each row)
2885            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2886            The size of this array is equal to the number of local rows, i.e 'm'.
2887            You must leave room for the diagonal entry even if it is zero.
2888 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2889            submatrix (same value is used for all local rows).
2890 -  o_nnz - array containing the number of nonzeros in the various rows of the
2891            OFF-DIAGONAL portion of the local submatrix (possibly different for
2892            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2893            structure. The size of this array is equal to the number
2894            of local rows, i.e 'm'.
2895 
2896    If the *_nnz parameter is given then the *_nz parameter is ignored
2897 
2898    The AIJ format (also called the Yale sparse matrix format or
2899    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2900    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2901 
2902    The parallel matrix is partitioned such that the first m0 rows belong to
2903    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2904    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2905 
2906    The DIAGONAL portion of the local submatrix of a processor can be defined
2907    as the submatrix which is obtained by extraction the part corresponding
2908    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2909    first row that belongs to the processor, and r2 is the last row belonging
2910    to the this processor. This is a square mxm matrix. The remaining portion
2911    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2912 
2913    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2914 
2915    Example usage:
2916 
2917    Consider the following 8x8 matrix with 34 non-zero values, that is
2918    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2919    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2920    as follows:
2921 
2922 .vb
2923             1  2  0  |  0  3  0  |  0  4
2924     Proc0   0  5  6  |  7  0  0  |  8  0
2925             9  0 10  | 11  0  0  | 12  0
2926     -------------------------------------
2927            13  0 14  | 15 16 17  |  0  0
2928     Proc1   0 18  0  | 19 20 21  |  0  0
2929             0  0  0  | 22 23  0  | 24  0
2930     -------------------------------------
2931     Proc2  25 26 27  |  0  0 28  | 29  0
2932            30  0  0  | 31 32 33  |  0 34
2933 .ve
2934 
2935    This can be represented as a collection of submatrices as:
2936 
2937 .vb
2938       A B C
2939       D E F
2940       G H I
2941 .ve
2942 
2943    Where the submatrices A,B,C are owned by proc0, D,E,F are
2944    owned by proc1, G,H,I are owned by proc2.
2945 
2946    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2947    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2948    The 'M','N' parameters are 8,8, and have the same values on all procs.
2949 
2950    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2951    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2952    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2953    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2954    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2955    matrix, ans [DF] as another SeqAIJ matrix.
2956 
2957    When d_nz, o_nz parameters are specified, d_nz storage elements are
2958    allocated for every row of the local diagonal submatrix, and o_nz
2959    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2960    One way to choose d_nz and o_nz is to use the max nonzerors per local
2961    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2962    In this case, the values of d_nz,o_nz are:
2963 .vb
2964      proc0 : dnz = 2, o_nz = 2
2965      proc1 : dnz = 3, o_nz = 2
2966      proc2 : dnz = 1, o_nz = 4
2967 .ve
2968    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2969    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2970    for proc3. i.e we are using 12+15+10=37 storage locations to store
2971    34 values.
2972 
2973    When d_nnz, o_nnz parameters are specified, the storage is specified
2974    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2975    In the above case the values for d_nnz,o_nnz are:
2976 .vb
2977      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2978      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2979      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2980 .ve
2981    Here the space allocated is sum of all the above values i.e 34, and
2982    hence pre-allocation is perfect.
2983 
2984    Level: intermediate
2985 
2986 .keywords: matrix, aij, compressed row, sparse, parallel
2987 
2988 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2989           MPIAIJ
2990 @*/
2991 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2992 {
2993   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2994 
2995   PetscFunctionBegin;
2996   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2997   if (f) {
2998     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2999   }
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3005 /*@C
3006      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3007          CSR format the local rows.
3008 
3009    Collective on MPI_Comm
3010 
3011    Input Parameters:
3012 +  comm - MPI communicator
3013 .  m - number of local rows (Cannot be PETSC_DECIDE)
3014 .  n - This value should be the same as the local size used in creating the
3015        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3016        calculated if N is given) For square matrices n is almost always m.
3017 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3018 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3019 .   i - row indices
3020 .   j - column indices
3021 -   a - matrix values
3022 
3023    Output Parameter:
3024 .   mat - the matrix
3025 
3026    Level: intermediate
3027 
3028    Notes:
3029        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3030      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3031      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3032 
3033        The i and j indices are 0 based
3034 
3035 .keywords: matrix, aij, compressed row, sparse, parallel
3036 
3037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3038           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3039 @*/
3040 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3041 {
3042   PetscErrorCode ierr;
3043 
3044  PetscFunctionBegin;
3045   if (i[0]) {
3046     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3047   }
3048   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3049   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3050   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3051   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatCreateMPIAIJ"
3057 /*@C
3058    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3059    (the default parallel PETSc format).  For good matrix assembly performance
3060    the user should preallocate the matrix storage by setting the parameters
3061    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3062    performance can be increased by more than a factor of 50.
3063 
3064    Collective on MPI_Comm
3065 
3066    Input Parameters:
3067 +  comm - MPI communicator
3068 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3069            This value should be the same as the local size used in creating the
3070            y vector for the matrix-vector product y = Ax.
3071 .  n - This value should be the same as the local size used in creating the
3072        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3073        calculated if N is given) For square matrices n is almost always m.
3074 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3075 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3076 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3077            (same value is used for all local rows)
3078 .  d_nnz - array containing the number of nonzeros in the various rows of the
3079            DIAGONAL portion of the local submatrix (possibly different for each row)
3080            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3081            The size of this array is equal to the number of local rows, i.e 'm'.
3082            You must leave room for the diagonal entry even if it is zero.
3083 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3084            submatrix (same value is used for all local rows).
3085 -  o_nnz - array containing the number of nonzeros in the various rows of the
3086            OFF-DIAGONAL portion of the local submatrix (possibly different for
3087            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3088            structure. The size of this array is equal to the number
3089            of local rows, i.e 'm'.
3090 
3091    Output Parameter:
3092 .  A - the matrix
3093 
3094    Notes:
3095    If the *_nnz parameter is given then the *_nz parameter is ignored
3096 
3097    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3098    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3099    storage requirements for this matrix.
3100 
3101    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3102    processor than it must be used on all processors that share the object for
3103    that argument.
3104 
3105    The user MUST specify either the local or global matrix dimensions
3106    (possibly both).
3107 
3108    The parallel matrix is partitioned across processors such that the
3109    first m0 rows belong to process 0, the next m1 rows belong to
3110    process 1, the next m2 rows belong to process 2 etc.. where
3111    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3112    values corresponding to [m x N] submatrix.
3113 
3114    The columns are logically partitioned with the n0 columns belonging
3115    to 0th partition, the next n1 columns belonging to the next
3116    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3117 
3118    The DIAGONAL portion of the local submatrix on any given processor
3119    is the submatrix corresponding to the rows and columns m,n
3120    corresponding to the given processor. i.e diagonal matrix on
3121    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3122    etc. The remaining portion of the local submatrix [m x (N-n)]
3123    constitute the OFF-DIAGONAL portion. The example below better
3124    illustrates this concept.
3125 
3126    For a square global matrix we define each processor's diagonal portion
3127    to be its local rows and the corresponding columns (a square submatrix);
3128    each processor's off-diagonal portion encompasses the remainder of the
3129    local matrix (a rectangular submatrix).
3130 
3131    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3132 
3133    When calling this routine with a single process communicator, a matrix of
3134    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3135    type of communicator, use the construction mechanism:
3136      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3137 
3138    By default, this format uses inodes (identical nodes) when possible.
3139    We search for consecutive rows with the same nonzero structure, thereby
3140    reusing matrix information to achieve increased efficiency.
3141 
3142    Options Database Keys:
3143 +  -mat_no_inode  - Do not use inodes
3144 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3145 -  -mat_aij_oneindex - Internally use indexing starting at 1
3146         rather than 0.  Note that when calling MatSetValues(),
3147         the user still MUST index entries starting at 0!
3148 
3149 
3150    Example usage:
3151 
3152    Consider the following 8x8 matrix with 34 non-zero values, that is
3153    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3154    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3155    as follows:
3156 
3157 .vb
3158             1  2  0  |  0  3  0  |  0  4
3159     Proc0   0  5  6  |  7  0  0  |  8  0
3160             9  0 10  | 11  0  0  | 12  0
3161     -------------------------------------
3162            13  0 14  | 15 16 17  |  0  0
3163     Proc1   0 18  0  | 19 20 21  |  0  0
3164             0  0  0  | 22 23  0  | 24  0
3165     -------------------------------------
3166     Proc2  25 26 27  |  0  0 28  | 29  0
3167            30  0  0  | 31 32 33  |  0 34
3168 .ve
3169 
3170    This can be represented as a collection of submatrices as:
3171 
3172 .vb
3173       A B C
3174       D E F
3175       G H I
3176 .ve
3177 
3178    Where the submatrices A,B,C are owned by proc0, D,E,F are
3179    owned by proc1, G,H,I are owned by proc2.
3180 
3181    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3182    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3183    The 'M','N' parameters are 8,8, and have the same values on all procs.
3184 
3185    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3186    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3187    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3188    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3189    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3190    matrix, ans [DF] as another SeqAIJ matrix.
3191 
3192    When d_nz, o_nz parameters are specified, d_nz storage elements are
3193    allocated for every row of the local diagonal submatrix, and o_nz
3194    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3195    One way to choose d_nz and o_nz is to use the max nonzerors per local
3196    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3197    In this case, the values of d_nz,o_nz are:
3198 .vb
3199      proc0 : dnz = 2, o_nz = 2
3200      proc1 : dnz = 3, o_nz = 2
3201      proc2 : dnz = 1, o_nz = 4
3202 .ve
3203    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3204    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3205    for proc3. i.e we are using 12+15+10=37 storage locations to store
3206    34 values.
3207 
3208    When d_nnz, o_nnz parameters are specified, the storage is specified
3209    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3210    In the above case the values for d_nnz,o_nnz are:
3211 .vb
3212      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3213      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3214      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3215 .ve
3216    Here the space allocated is sum of all the above values i.e 34, and
3217    hence pre-allocation is perfect.
3218 
3219    Level: intermediate
3220 
3221 .keywords: matrix, aij, compressed row, sparse, parallel
3222 
3223 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3224           MPIAIJ, MatCreateMPIAIJWithArrays()
3225 @*/
3226 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3227 {
3228   PetscErrorCode ierr;
3229   PetscMPIInt    size;
3230 
3231   PetscFunctionBegin;
3232   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3233   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3234   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3235   if (size > 1) {
3236     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3237     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3238   } else {
3239     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3240     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3241   }
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3248 {
3249   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3250 
3251   PetscFunctionBegin;
3252   *Ad     = a->A;
3253   *Ao     = a->B;
3254   *colmap = a->garray;
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 #undef __FUNCT__
3259 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3260 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3261 {
3262   PetscErrorCode ierr;
3263   PetscInt       i;
3264   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3265 
3266   PetscFunctionBegin;
3267   if (coloring->ctype == IS_COLORING_GLOBAL) {
3268     ISColoringValue *allcolors,*colors;
3269     ISColoring      ocoloring;
3270 
3271     /* set coloring for diagonal portion */
3272     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3273 
3274     /* set coloring for off-diagonal portion */
3275     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3276     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3277     for (i=0; i<a->B->cmap.n; i++) {
3278       colors[i] = allcolors[a->garray[i]];
3279     }
3280     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3281     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3282     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3283     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3284   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3285     ISColoringValue *colors;
3286     PetscInt        *larray;
3287     ISColoring      ocoloring;
3288 
3289     /* set coloring for diagonal portion */
3290     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3291     for (i=0; i<a->A->cmap.n; i++) {
3292       larray[i] = i + A->cmap.rstart;
3293     }
3294     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3295     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3296     for (i=0; i<a->A->cmap.n; i++) {
3297       colors[i] = coloring->colors[larray[i]];
3298     }
3299     ierr = PetscFree(larray);CHKERRQ(ierr);
3300     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3301     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3302     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3303 
3304     /* set coloring for off-diagonal portion */
3305     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3306     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3307     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3308     for (i=0; i<a->B->cmap.n; i++) {
3309       colors[i] = coloring->colors[larray[i]];
3310     }
3311     ierr = PetscFree(larray);CHKERRQ(ierr);
3312     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3313     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3314     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3315   } else {
3316     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3317   }
3318 
3319   PetscFunctionReturn(0);
3320 }
3321 
3322 #if defined(PETSC_HAVE_ADIC)
3323 #undef __FUNCT__
3324 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3325 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3326 {
3327   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3328   PetscErrorCode ierr;
3329 
3330   PetscFunctionBegin;
3331   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3332   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3333   PetscFunctionReturn(0);
3334 }
3335 #endif
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3339 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3340 {
3341   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3342   PetscErrorCode ierr;
3343 
3344   PetscFunctionBegin;
3345   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3346   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3347   PetscFunctionReturn(0);
3348 }
3349 
3350 #undef __FUNCT__
3351 #define __FUNCT__ "MatMerge"
3352 /*@C
3353       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3354                  matrices from each processor
3355 
3356     Collective on MPI_Comm
3357 
3358    Input Parameters:
3359 +    comm - the communicators the parallel matrix will live on
3360 .    inmat - the input sequential matrices
3361 .    n - number of local columns (or PETSC_DECIDE)
3362 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3363 
3364    Output Parameter:
3365 .    outmat - the parallel matrix generated
3366 
3367     Level: advanced
3368 
3369    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3370 
3371 @*/
3372 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3373 {
3374   PetscErrorCode ierr;
3375   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3376   PetscInt       *indx;
3377   PetscScalar    *values;
3378 
3379   PetscFunctionBegin;
3380   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3381   if (scall == MAT_INITIAL_MATRIX){
3382     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3383     if (n == PETSC_DECIDE){
3384       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3385     }
3386     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3387     rstart -= m;
3388 
3389     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3390     for (i=0;i<m;i++) {
3391       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3392       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3393       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3394     }
3395     /* This routine will ONLY return MPIAIJ type matrix */
3396     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3397     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3398     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3399     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3400     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3401 
3402   } else if (scall == MAT_REUSE_MATRIX){
3403     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3404   } else {
3405     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3406   }
3407 
3408   for (i=0;i<m;i++) {
3409     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3410     Ii    = i + rstart;
3411     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3412     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3413   }
3414   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3415   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3416   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3417 
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 #undef __FUNCT__
3422 #define __FUNCT__ "MatFileSplit"
3423 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3424 {
3425   PetscErrorCode    ierr;
3426   PetscMPIInt       rank;
3427   PetscInt          m,N,i,rstart,nnz;
3428   size_t            len;
3429   const PetscInt    *indx;
3430   PetscViewer       out;
3431   char              *name;
3432   Mat               B;
3433   const PetscScalar *values;
3434 
3435   PetscFunctionBegin;
3436   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3437   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3438   /* Should this be the type of the diagonal block of A? */
3439   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3440   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3441   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3442   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3443   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3444   for (i=0;i<m;i++) {
3445     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3446     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3447     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3448   }
3449   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3450   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3451 
3452   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3453   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3454   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3455   sprintf(name,"%s.%d",outfile,rank);
3456   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3457   ierr = PetscFree(name);
3458   ierr = MatView(B,out);CHKERRQ(ierr);
3459   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3460   ierr = MatDestroy(B);CHKERRQ(ierr);
3461   PetscFunctionReturn(0);
3462 }
3463 
3464 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3465 #undef __FUNCT__
3466 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3467 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3468 {
3469   PetscErrorCode       ierr;
3470   Mat_Merge_SeqsToMPI  *merge;
3471   PetscContainer       container;
3472 
3473   PetscFunctionBegin;
3474   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3475   if (container) {
3476     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3477     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3478     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3479     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3482     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3483     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3484     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3485     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3486     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3487     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3488 
3489     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3490     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3491   }
3492   ierr = PetscFree(merge);CHKERRQ(ierr);
3493 
3494   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #include "src/mat/utils/freespace.h"
3499 #include "petscbt.h"
3500 static PetscEvent logkey_seqstompinum = 0;
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3503 /*@C
3504       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3505                  matrices from each processor
3506 
3507     Collective on MPI_Comm
3508 
3509    Input Parameters:
3510 +    comm - the communicators the parallel matrix will live on
3511 .    seqmat - the input sequential matrices
3512 .    m - number of local rows (or PETSC_DECIDE)
3513 .    n - number of local columns (or PETSC_DECIDE)
3514 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3515 
3516    Output Parameter:
3517 .    mpimat - the parallel matrix generated
3518 
3519     Level: advanced
3520 
3521    Notes:
3522      The dimensions of the sequential matrix in each processor MUST be the same.
3523      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3524      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3525 @*/
3526 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3527 {
3528   PetscErrorCode       ierr;
3529   MPI_Comm             comm=mpimat->comm;
3530   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3531   PetscMPIInt          size,rank,taga,*len_s;
3532   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3533   PetscInt             proc,m;
3534   PetscInt             **buf_ri,**buf_rj;
3535   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3536   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3537   MPI_Request          *s_waits,*r_waits;
3538   MPI_Status           *status;
3539   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3540   Mat_Merge_SeqsToMPI  *merge;
3541   PetscContainer       container;
3542 
3543   PetscFunctionBegin;
3544   if (!logkey_seqstompinum) {
3545     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3546   }
3547   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3548 
3549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3550   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3551 
3552   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3553   if (container) {
3554     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3555   }
3556   bi     = merge->bi;
3557   bj     = merge->bj;
3558   buf_ri = merge->buf_ri;
3559   buf_rj = merge->buf_rj;
3560 
3561   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3562   owners = merge->rowmap.range;
3563   len_s  = merge->len_s;
3564 
3565   /* send and recv matrix values */
3566   /*-----------------------------*/
3567   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3568   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3569 
3570   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3571   for (proc=0,k=0; proc<size; proc++){
3572     if (!len_s[proc]) continue;
3573     i = owners[proc];
3574     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3575     k++;
3576   }
3577 
3578   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3579   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3580   ierr = PetscFree(status);CHKERRQ(ierr);
3581 
3582   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3583   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3584 
3585   /* insert mat values of mpimat */
3586   /*----------------------------*/
3587   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3588   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3589   nextrow = buf_ri_k + merge->nrecv;
3590   nextai  = nextrow + merge->nrecv;
3591 
3592   for (k=0; k<merge->nrecv; k++){
3593     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3594     nrows = *(buf_ri_k[k]);
3595     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3596     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3597   }
3598 
3599   /* set values of ba */
3600   m = merge->rowmap.n;
3601   for (i=0; i<m; i++) {
3602     arow = owners[rank] + i;
3603     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3604     bnzi = bi[i+1] - bi[i];
3605     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3606 
3607     /* add local non-zero vals of this proc's seqmat into ba */
3608     anzi = ai[arow+1] - ai[arow];
3609     aj   = a->j + ai[arow];
3610     aa   = a->a + ai[arow];
3611     nextaj = 0;
3612     for (j=0; nextaj<anzi; j++){
3613       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3614         ba_i[j] += aa[nextaj++];
3615       }
3616     }
3617 
3618     /* add received vals into ba */
3619     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3620       /* i-th row */
3621       if (i == *nextrow[k]) {
3622         anzi = *(nextai[k]+1) - *nextai[k];
3623         aj   = buf_rj[k] + *(nextai[k]);
3624         aa   = abuf_r[k] + *(nextai[k]);
3625         nextaj = 0;
3626         for (j=0; nextaj<anzi; j++){
3627           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3628             ba_i[j] += aa[nextaj++];
3629           }
3630         }
3631         nextrow[k]++; nextai[k]++;
3632       }
3633     }
3634     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3635   }
3636   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638 
3639   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3640   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3641   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3642   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3643   PetscFunctionReturn(0);
3644 }
3645 
3646 static PetscEvent logkey_seqstompisym = 0;
3647 #undef __FUNCT__
3648 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3649 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3650 {
3651   PetscErrorCode       ierr;
3652   Mat                  B_mpi;
3653   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3654   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3655   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3656   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3657   PetscInt             len,proc,*dnz,*onz;
3658   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3659   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3660   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3661   MPI_Status           *status;
3662   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3663   PetscBT              lnkbt;
3664   Mat_Merge_SeqsToMPI  *merge;
3665   PetscContainer       container;
3666 
3667   PetscFunctionBegin;
3668   if (!logkey_seqstompisym) {
3669     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3670   }
3671   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3672 
3673   /* make sure it is a PETSc comm */
3674   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3677 
3678   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3679   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3680 
3681   /* determine row ownership */
3682   /*---------------------------------------------------------*/
3683   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3684   merge->rowmap.n = m;
3685   merge->rowmap.N = M;
3686   merge->rowmap.bs = 1;
3687   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3688   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3689   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3690 
3691   m      = merge->rowmap.n;
3692   M      = merge->rowmap.N;
3693   owners = merge->rowmap.range;
3694 
3695   /* determine the number of messages to send, their lengths */
3696   /*---------------------------------------------------------*/
3697   len_s  = merge->len_s;
3698 
3699   len = 0;  /* length of buf_si[] */
3700   merge->nsend = 0;
3701   for (proc=0; proc<size; proc++){
3702     len_si[proc] = 0;
3703     if (proc == rank){
3704       len_s[proc] = 0;
3705     } else {
3706       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3707       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3708     }
3709     if (len_s[proc]) {
3710       merge->nsend++;
3711       nrows = 0;
3712       for (i=owners[proc]; i<owners[proc+1]; i++){
3713         if (ai[i+1] > ai[i]) nrows++;
3714       }
3715       len_si[proc] = 2*(nrows+1);
3716       len += len_si[proc];
3717     }
3718   }
3719 
3720   /* determine the number and length of messages to receive for ij-structure */
3721   /*-------------------------------------------------------------------------*/
3722   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3723   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3724 
3725   /* post the Irecv of j-structure */
3726   /*-------------------------------*/
3727   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3728   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3729 
3730   /* post the Isend of j-structure */
3731   /*--------------------------------*/
3732   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3733   sj_waits = si_waits + merge->nsend;
3734 
3735   for (proc=0, k=0; proc<size; proc++){
3736     if (!len_s[proc]) continue;
3737     i = owners[proc];
3738     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3739     k++;
3740   }
3741 
3742   /* receives and sends of j-structure are complete */
3743   /*------------------------------------------------*/
3744   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3745   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3746 
3747   /* send and recv i-structure */
3748   /*---------------------------*/
3749   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3750   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3751 
3752   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3753   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3754   for (proc=0,k=0; proc<size; proc++){
3755     if (!len_s[proc]) continue;
3756     /* form outgoing message for i-structure:
3757          buf_si[0]:                 nrows to be sent
3758                [1:nrows]:           row index (global)
3759                [nrows+1:2*nrows+1]: i-structure index
3760     */
3761     /*-------------------------------------------*/
3762     nrows = len_si[proc]/2 - 1;
3763     buf_si_i    = buf_si + nrows+1;
3764     buf_si[0]   = nrows;
3765     buf_si_i[0] = 0;
3766     nrows = 0;
3767     for (i=owners[proc]; i<owners[proc+1]; i++){
3768       anzi = ai[i+1] - ai[i];
3769       if (anzi) {
3770         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3771         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3772         nrows++;
3773       }
3774     }
3775     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3776     k++;
3777     buf_si += len_si[proc];
3778   }
3779 
3780   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3781   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3782 
3783   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3784   for (i=0; i<merge->nrecv; i++){
3785     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3786   }
3787 
3788   ierr = PetscFree(len_si);CHKERRQ(ierr);
3789   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3790   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3791   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3792   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3793   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3794   ierr = PetscFree(status);CHKERRQ(ierr);
3795 
3796   /* compute a local seq matrix in each processor */
3797   /*----------------------------------------------*/
3798   /* allocate bi array and free space for accumulating nonzero column info */
3799   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3800   bi[0] = 0;
3801 
3802   /* create and initialize a linked list */
3803   nlnk = N+1;
3804   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3805 
3806   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3807   len = 0;
3808   len  = ai[owners[rank+1]] - ai[owners[rank]];
3809   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3810   current_space = free_space;
3811 
3812   /* determine symbolic info for each local row */
3813   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3814   nextrow = buf_ri_k + merge->nrecv;
3815   nextai  = nextrow + merge->nrecv;
3816   for (k=0; k<merge->nrecv; k++){
3817     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3818     nrows = *buf_ri_k[k];
3819     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3820     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3821   }
3822 
3823   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3824   len = 0;
3825   for (i=0;i<m;i++) {
3826     bnzi   = 0;
3827     /* add local non-zero cols of this proc's seqmat into lnk */
3828     arow   = owners[rank] + i;
3829     anzi   = ai[arow+1] - ai[arow];
3830     aj     = a->j + ai[arow];
3831     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3832     bnzi += nlnk;
3833     /* add received col data into lnk */
3834     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3835       if (i == *nextrow[k]) { /* i-th row */
3836         anzi = *(nextai[k]+1) - *nextai[k];
3837         aj   = buf_rj[k] + *nextai[k];
3838         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3839         bnzi += nlnk;
3840         nextrow[k]++; nextai[k]++;
3841       }
3842     }
3843     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3844 
3845     /* if free space is not available, make more free space */
3846     if (current_space->local_remaining<bnzi) {
3847       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3848       nspacedouble++;
3849     }
3850     /* copy data into free space, then initialize lnk */
3851     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3852     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3853 
3854     current_space->array           += bnzi;
3855     current_space->local_used      += bnzi;
3856     current_space->local_remaining -= bnzi;
3857 
3858     bi[i+1] = bi[i] + bnzi;
3859   }
3860 
3861   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3862 
3863   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3864   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3865   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3866 
3867   /* create symbolic parallel matrix B_mpi */
3868   /*---------------------------------------*/
3869   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3870   if (n==PETSC_DECIDE) {
3871     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3872   } else {
3873     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3874   }
3875   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3876   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3877   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3878 
3879   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3880   B_mpi->assembled     = PETSC_FALSE;
3881   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3882   merge->bi            = bi;
3883   merge->bj            = bj;
3884   merge->buf_ri        = buf_ri;
3885   merge->buf_rj        = buf_rj;
3886   merge->coi           = PETSC_NULL;
3887   merge->coj           = PETSC_NULL;
3888   merge->owners_co     = PETSC_NULL;
3889 
3890   /* attach the supporting struct to B_mpi for reuse */
3891   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3892   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3893   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3894   *mpimat = B_mpi;
3895 
3896   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3897   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 static PetscEvent logkey_seqstompi = 0;
3902 #undef __FUNCT__
3903 #define __FUNCT__ "MatMerge_SeqsToMPI"
3904 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3905 {
3906   PetscErrorCode   ierr;
3907 
3908   PetscFunctionBegin;
3909   if (!logkey_seqstompi) {
3910     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3911   }
3912   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3913   if (scall == MAT_INITIAL_MATRIX){
3914     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3915   }
3916   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3917   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3918   PetscFunctionReturn(0);
3919 }
3920 static PetscEvent logkey_getlocalmat = 0;
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatGetLocalMat"
3923 /*@C
3924      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3925 
3926     Not Collective
3927 
3928    Input Parameters:
3929 +    A - the matrix
3930 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3931 
3932    Output Parameter:
3933 .    A_loc - the local sequential matrix generated
3934 
3935     Level: developer
3936 
3937 @*/
3938 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3939 {
3940   PetscErrorCode  ierr;
3941   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3942   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3943   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3944   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3945   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3946   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3947 
3948   PetscFunctionBegin;
3949   if (!logkey_getlocalmat) {
3950     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3951   }
3952   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3953   if (scall == MAT_INITIAL_MATRIX){
3954     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3955     ci[0] = 0;
3956     for (i=0; i<am; i++){
3957       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3958     }
3959     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3960     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3961     k = 0;
3962     for (i=0; i<am; i++) {
3963       ncols_o = bi[i+1] - bi[i];
3964       ncols_d = ai[i+1] - ai[i];
3965       /* off-diagonal portion of A */
3966       for (jo=0; jo<ncols_o; jo++) {
3967         col = cmap[*bj];
3968         if (col >= cstart) break;
3969         cj[k]   = col; bj++;
3970         ca[k++] = *ba++;
3971       }
3972       /* diagonal portion of A */
3973       for (j=0; j<ncols_d; j++) {
3974         cj[k]   = cstart + *aj++;
3975         ca[k++] = *aa++;
3976       }
3977       /* off-diagonal portion of A */
3978       for (j=jo; j<ncols_o; j++) {
3979         cj[k]   = cmap[*bj++];
3980         ca[k++] = *ba++;
3981       }
3982     }
3983     /* put together the new matrix */
3984     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3985     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3986     /* Since these are PETSc arrays, change flags to free them as necessary. */
3987     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3988     mat->free_a  = PETSC_TRUE;
3989     mat->free_ij = PETSC_TRUE;
3990     mat->nonew   = 0;
3991   } else if (scall == MAT_REUSE_MATRIX){
3992     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3993     ci = mat->i; cj = mat->j; ca = mat->a;
3994     for (i=0; i<am; i++) {
3995       /* off-diagonal portion of A */
3996       ncols_o = bi[i+1] - bi[i];
3997       for (jo=0; jo<ncols_o; jo++) {
3998         col = cmap[*bj];
3999         if (col >= cstart) break;
4000         *ca++ = *ba++; bj++;
4001       }
4002       /* diagonal portion of A */
4003       ncols_d = ai[i+1] - ai[i];
4004       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4005       /* off-diagonal portion of A */
4006       for (j=jo; j<ncols_o; j++) {
4007         *ca++ = *ba++; bj++;
4008       }
4009     }
4010   } else {
4011     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4012   }
4013 
4014   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 static PetscEvent logkey_getlocalmatcondensed = 0;
4019 #undef __FUNCT__
4020 #define __FUNCT__ "MatGetLocalMatCondensed"
4021 /*@C
4022      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4023 
4024     Not Collective
4025 
4026    Input Parameters:
4027 +    A - the matrix
4028 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4029 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4030 
4031    Output Parameter:
4032 .    A_loc - the local sequential matrix generated
4033 
4034     Level: developer
4035 
4036 @*/
4037 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4038 {
4039   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4040   PetscErrorCode    ierr;
4041   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4042   IS                isrowa,iscola;
4043   Mat               *aloc;
4044 
4045   PetscFunctionBegin;
4046   if (!logkey_getlocalmatcondensed) {
4047     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4048   }
4049   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4050   if (!row){
4051     start = A->rmap.rstart; end = A->rmap.rend;
4052     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4053   } else {
4054     isrowa = *row;
4055   }
4056   if (!col){
4057     start = A->cmap.rstart;
4058     cmap  = a->garray;
4059     nzA   = a->A->cmap.n;
4060     nzB   = a->B->cmap.n;
4061     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4062     ncols = 0;
4063     for (i=0; i<nzB; i++) {
4064       if (cmap[i] < start) idx[ncols++] = cmap[i];
4065       else break;
4066     }
4067     imark = i;
4068     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4069     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4070     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4071     ierr = PetscFree(idx);CHKERRQ(ierr);
4072   } else {
4073     iscola = *col;
4074   }
4075   if (scall != MAT_INITIAL_MATRIX){
4076     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4077     aloc[0] = *A_loc;
4078   }
4079   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4080   *A_loc = aloc[0];
4081   ierr = PetscFree(aloc);CHKERRQ(ierr);
4082   if (!row){
4083     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4084   }
4085   if (!col){
4086     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4087   }
4088   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4089   PetscFunctionReturn(0);
4090 }
4091 
4092 static PetscEvent logkey_GetBrowsOfAcols = 0;
4093 #undef __FUNCT__
4094 #define __FUNCT__ "MatGetBrowsOfAcols"
4095 /*@C
4096     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4097 
4098     Collective on Mat
4099 
4100    Input Parameters:
4101 +    A,B - the matrices in mpiaij format
4102 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4103 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4104 
4105    Output Parameter:
4106 +    rowb, colb - index sets of rows and columns of B to extract
4107 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4108 -    B_seq - the sequential matrix generated
4109 
4110     Level: developer
4111 
4112 @*/
4113 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4114 {
4115   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4116   PetscErrorCode    ierr;
4117   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4118   IS                isrowb,iscolb;
4119   Mat               *bseq;
4120 
4121   PetscFunctionBegin;
4122   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4123     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4124   }
4125   if (!logkey_GetBrowsOfAcols) {
4126     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4127   }
4128   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4129 
4130   if (scall == MAT_INITIAL_MATRIX){
4131     start = A->cmap.rstart;
4132     cmap  = a->garray;
4133     nzA   = a->A->cmap.n;
4134     nzB   = a->B->cmap.n;
4135     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4136     ncols = 0;
4137     for (i=0; i<nzB; i++) {  /* row < local row index */
4138       if (cmap[i] < start) idx[ncols++] = cmap[i];
4139       else break;
4140     }
4141     imark = i;
4142     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4143     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4144     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4145     ierr = PetscFree(idx);CHKERRQ(ierr);
4146     *brstart = imark;
4147     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4148   } else {
4149     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4150     isrowb = *rowb; iscolb = *colb;
4151     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4152     bseq[0] = *B_seq;
4153   }
4154   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4155   *B_seq = bseq[0];
4156   ierr = PetscFree(bseq);CHKERRQ(ierr);
4157   if (!rowb){
4158     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4159   } else {
4160     *rowb = isrowb;
4161   }
4162   if (!colb){
4163     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4164   } else {
4165     *colb = iscolb;
4166   }
4167   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4168   PetscFunctionReturn(0);
4169 }
4170 
4171 static PetscEvent logkey_GetBrowsOfAocols = 0;
4172 #undef __FUNCT__
4173 #define __FUNCT__ "MatGetBrowsOfAoCols"
4174 /*@C
4175     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4176     of the OFF-DIAGONAL portion of local A
4177 
4178     Collective on Mat
4179 
4180    Input Parameters:
4181 +    A,B - the matrices in mpiaij format
4182 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4183 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4184 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4185 
4186    Output Parameter:
4187 +    B_oth - the sequential matrix generated
4188 
4189     Level: developer
4190 
4191 @*/
4192 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4193 {
4194   VecScatter_MPI_General *gen_to,*gen_from;
4195   PetscErrorCode         ierr;
4196   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4197   Mat_SeqAIJ             *b_oth;
4198   VecScatter             ctx=a->Mvctx;
4199   MPI_Comm               comm=ctx->comm;
4200   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4201   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4202   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4203   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4204   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4205   MPI_Status             *sstatus,rstatus;
4206   PetscInt               *cols,sbs,rbs;
4207   PetscScalar            *vals;
4208 
4209   PetscFunctionBegin;
4210   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4211     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4212   }
4213   if (!logkey_GetBrowsOfAocols) {
4214     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4215   }
4216   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4217   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4218 
4219   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4220   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4221   rvalues  = gen_from->values; /* holds the length of receiving row */
4222   svalues  = gen_to->values;   /* holds the length of sending row */
4223   nrecvs   = gen_from->n;
4224   nsends   = gen_to->n;
4225 
4226   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4227   srow     = gen_to->indices;   /* local row index to be sent */
4228   sstarts  = gen_to->starts;
4229   sprocs   = gen_to->procs;
4230   sstatus  = gen_to->sstatus;
4231   sbs      = gen_to->bs;
4232   rstarts  = gen_from->starts;
4233   rprocs   = gen_from->procs;
4234   rbs      = gen_from->bs;
4235 
4236   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4237   if (scall == MAT_INITIAL_MATRIX){
4238     /* i-array */
4239     /*---------*/
4240     /*  post receives */
4241     for (i=0; i<nrecvs; i++){
4242       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4243       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4244       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4245     }
4246 
4247     /* pack the outgoing message */
4248     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4249     rstartsj = sstartsj + nsends +1;
4250     sstartsj[0] = 0;  rstartsj[0] = 0;
4251     len = 0; /* total length of j or a array to be sent */
4252     k = 0;
4253     for (i=0; i<nsends; i++){
4254       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4255       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4256       for (j=0; j<nrows; j++) {
4257         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4258         for (l=0; l<sbs; l++){
4259           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4260           rowlen[j*sbs+l] = ncols;
4261           len += ncols;
4262           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4263         }
4264         k++;
4265       }
4266       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4267       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4268     }
4269     /* recvs and sends of i-array are completed */
4270     i = nrecvs;
4271     while (i--) {
4272       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4273     }
4274     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4275 
4276     /* allocate buffers for sending j and a arrays */
4277     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4278     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4279 
4280     /* create i-array of B_oth */
4281     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4282     b_othi[0] = 0;
4283     len = 0; /* total length of j or a array to be received */
4284     k = 0;
4285     for (i=0; i<nrecvs; i++){
4286       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4287       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4288       for (j=0; j<nrows; j++) {
4289         b_othi[k+1] = b_othi[k] + rowlen[j];
4290         len += rowlen[j]; k++;
4291       }
4292       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4293     }
4294 
4295     /* allocate space for j and a arrrays of B_oth */
4296     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4297     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4298 
4299     /* j-array */
4300     /*---------*/
4301     /*  post receives of j-array */
4302     for (i=0; i<nrecvs; i++){
4303       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4304       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4305     }
4306 
4307     /* pack the outgoing message j-array */
4308     k = 0;
4309     for (i=0; i<nsends; i++){
4310       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4311       bufJ = bufj+sstartsj[i];
4312       for (j=0; j<nrows; j++) {
4313         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4314         for (ll=0; ll<sbs; ll++){
4315           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4316           for (l=0; l<ncols; l++){
4317             *bufJ++ = cols[l];
4318           }
4319           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4320         }
4321       }
4322       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4323     }
4324 
4325     /* recvs and sends of j-array are completed */
4326     i = nrecvs;
4327     while (i--) {
4328       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4329     }
4330     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4331   } else if (scall == MAT_REUSE_MATRIX){
4332     sstartsj = *startsj;
4333     rstartsj = sstartsj + nsends +1;
4334     bufa     = *bufa_ptr;
4335     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4336     b_otha   = b_oth->a;
4337   } else {
4338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4339   }
4340 
4341   /* a-array */
4342   /*---------*/
4343   /*  post receives of a-array */
4344   for (i=0; i<nrecvs; i++){
4345     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4346     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4347   }
4348 
4349   /* pack the outgoing message a-array */
4350   k = 0;
4351   for (i=0; i<nsends; i++){
4352     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4353     bufA = bufa+sstartsj[i];
4354     for (j=0; j<nrows; j++) {
4355       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4356       for (ll=0; ll<sbs; ll++){
4357         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4358         for (l=0; l<ncols; l++){
4359           *bufA++ = vals[l];
4360         }
4361         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4362       }
4363     }
4364     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4365   }
4366   /* recvs and sends of a-array are completed */
4367   i = nrecvs;
4368   while (i--) {
4369     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4370   }
4371   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4372   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4373 
4374   if (scall == MAT_INITIAL_MATRIX){
4375     /* put together the new matrix */
4376     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4377 
4378     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4379     /* Since these are PETSc arrays, change flags to free them as necessary. */
4380     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4381     b_oth->free_a  = PETSC_TRUE;
4382     b_oth->free_ij = PETSC_TRUE;
4383     b_oth->nonew   = 0;
4384 
4385     ierr = PetscFree(bufj);CHKERRQ(ierr);
4386     if (!startsj || !bufa_ptr){
4387       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4388       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4389     } else {
4390       *startsj  = sstartsj;
4391       *bufa_ptr = bufa;
4392     }
4393   }
4394   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatGetCommunicationStructs"
4400 /*@C
4401   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4402 
4403   Not Collective
4404 
4405   Input Parameters:
4406 . A - The matrix in mpiaij format
4407 
4408   Output Parameter:
4409 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4410 . colmap - A map from global column index to local index into lvec
4411 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4412 
4413   Level: developer
4414 
4415 @*/
4416 #if defined (PETSC_USE_CTABLE)
4417 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4418 #else
4419 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4420 #endif
4421 {
4422   Mat_MPIAIJ *a;
4423 
4424   PetscFunctionBegin;
4425   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4426   PetscValidPointer(lvec, 2)
4427   PetscValidPointer(colmap, 3)
4428   PetscValidPointer(multScatter, 4)
4429   a = (Mat_MPIAIJ *) A->data;
4430   if (lvec) *lvec = a->lvec;
4431   if (colmap) *colmap = a->colmap;
4432   if (multScatter) *multScatter = a->Mvctx;
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 EXTERN_C_BEGIN
4437 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4438 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4439 EXTERN_C_END
4440 
4441 /*MC
4442    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4443 
4444    Options Database Keys:
4445 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4446 
4447   Level: beginner
4448 
4449 .seealso: MatCreateMPIAIJ
4450 M*/
4451 
4452 EXTERN_C_BEGIN
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatCreate_MPIAIJ"
4455 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4456 {
4457   Mat_MPIAIJ     *b;
4458   PetscErrorCode ierr;
4459   PetscMPIInt    size;
4460 
4461   PetscFunctionBegin;
4462   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4463 
4464   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4465   B->data         = (void*)b;
4466   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4467   B->factor       = 0;
4468   B->rmap.bs      = 1;
4469   B->assembled    = PETSC_FALSE;
4470   B->mapping      = 0;
4471 
4472   B->insertmode      = NOT_SET_VALUES;
4473   b->size            = size;
4474   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4475 
4476   /* build cache for off array entries formed */
4477   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4478   b->donotstash  = PETSC_FALSE;
4479   b->colmap      = 0;
4480   b->garray      = 0;
4481   b->roworiented = PETSC_TRUE;
4482 
4483   /* stuff used for matrix vector multiply */
4484   b->lvec      = PETSC_NULL;
4485   b->Mvctx     = PETSC_NULL;
4486 
4487   /* stuff for MatGetRow() */
4488   b->rowindices   = 0;
4489   b->rowvalues    = 0;
4490   b->getrowactive = PETSC_FALSE;
4491 
4492 
4493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4494                                      "MatStoreValues_MPIAIJ",
4495                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4497                                      "MatRetrieveValues_MPIAIJ",
4498                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4500 				     "MatGetDiagonalBlock_MPIAIJ",
4501                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4503 				     "MatIsTranspose_MPIAIJ",
4504 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4506 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4507 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4509 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4510 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4512 				     "MatDiagonalScaleLocal_MPIAIJ",
4513 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4515                                      "MatConvert_MPIAIJ_MPICSRPERM",
4516                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4518                                      "MatConvert_MPIAIJ_MPICRL",
4519                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4520   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4521   PetscFunctionReturn(0);
4522 }
4523 EXTERN_C_END
4524 
4525 #undef __FUNCT__
4526 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4527 /*@C
4528      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4529          and "off-diagonal" part of the matrix in CSR format.
4530 
4531    Collective on MPI_Comm
4532 
4533    Input Parameters:
4534 +  comm - MPI communicator
4535 .  m - number of local rows (Cannot be PETSC_DECIDE)
4536 .  n - This value should be the same as the local size used in creating the
4537        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4538        calculated if N is given) For square matrices n is almost always m.
4539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4541 .   i - row indices for "diagonal" portion of matrix
4542 .   j - column indices
4543 .   a - matrix values
4544 .   oi - row indices for "off-diagonal" portion of matrix
4545 .   oj - column indices
4546 -   oa - matrix values
4547 
4548    Output Parameter:
4549 .   mat - the matrix
4550 
4551    Level: advanced
4552 
4553    Notes:
4554        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4555 
4556        The i and j indices are 0 based
4557 
4558        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4559 
4560 
4561 .keywords: matrix, aij, compressed row, sparse, parallel
4562 
4563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4564           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4565 @*/
4566 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4567 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4568 {
4569   PetscErrorCode ierr;
4570   Mat_MPIAIJ     *maij;
4571 
4572  PetscFunctionBegin;
4573   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4574   if (i[0]) {
4575     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4576   }
4577   if (oi[0]) {
4578     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4579   }
4580   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4581   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4582   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4583   maij = (Mat_MPIAIJ*) (*mat)->data;
4584   maij->donotstash     = PETSC_TRUE;
4585   (*mat)->preallocated = PETSC_TRUE;
4586 
4587   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4588   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4589   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4590 
4591   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4592   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4593 
4594   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4596   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598 
4599   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4600   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4601   PetscFunctionReturn(0);
4602 }
4603 
4604 /*
4605     Special version for direct calls from Fortran
4606 */
4607 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4608 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4610 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4611 #endif
4612 
4613 /* Change these macros so can be used in void function */
4614 #undef CHKERRQ
4615 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4616 #undef SETERRQ2
4617 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4618 #undef SETERRQ
4619 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4620 
4621 EXTERN_C_BEGIN
4622 #undef __FUNCT__
4623 #define __FUNCT__ "matsetvaluesmpiaij_"
4624 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4625 {
4626   Mat            mat = *mmat;
4627   PetscInt       m = *mm, n = *mn;
4628   InsertMode     addv = *maddv;
4629   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4630   PetscScalar    value;
4631   PetscErrorCode ierr;
4632 
4633   MatPreallocated(mat);
4634   if (mat->insertmode == NOT_SET_VALUES) {
4635     mat->insertmode = addv;
4636   }
4637 #if defined(PETSC_USE_DEBUG)
4638   else if (mat->insertmode != addv) {
4639     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4640   }
4641 #endif
4642   {
4643   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4644   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4645   PetscTruth     roworiented = aij->roworiented;
4646 
4647   /* Some Variables required in the macro */
4648   Mat            A = aij->A;
4649   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4650   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4651   PetscScalar    *aa = a->a;
4652   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4653   Mat            B = aij->B;
4654   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4655   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4656   PetscScalar    *ba = b->a;
4657 
4658   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4659   PetscInt       nonew = a->nonew;
4660   PetscScalar    *ap1,*ap2;
4661 
4662   PetscFunctionBegin;
4663   for (i=0; i<m; i++) {
4664     if (im[i] < 0) continue;
4665 #if defined(PETSC_USE_DEBUG)
4666     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4667 #endif
4668     if (im[i] >= rstart && im[i] < rend) {
4669       row      = im[i] - rstart;
4670       lastcol1 = -1;
4671       rp1      = aj + ai[row];
4672       ap1      = aa + ai[row];
4673       rmax1    = aimax[row];
4674       nrow1    = ailen[row];
4675       low1     = 0;
4676       high1    = nrow1;
4677       lastcol2 = -1;
4678       rp2      = bj + bi[row];
4679       ap2      = ba + bi[row];
4680       rmax2    = bimax[row];
4681       nrow2    = bilen[row];
4682       low2     = 0;
4683       high2    = nrow2;
4684 
4685       for (j=0; j<n; j++) {
4686         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4687         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4688         if (in[j] >= cstart && in[j] < cend){
4689           col = in[j] - cstart;
4690           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4691         } else if (in[j] < 0) continue;
4692 #if defined(PETSC_USE_DEBUG)
4693         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4694 #endif
4695         else {
4696           if (mat->was_assembled) {
4697             if (!aij->colmap) {
4698               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4699             }
4700 #if defined (PETSC_USE_CTABLE)
4701             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4702 	    col--;
4703 #else
4704             col = aij->colmap[in[j]] - 1;
4705 #endif
4706             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4707               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4708               col =  in[j];
4709               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4710               B = aij->B;
4711               b = (Mat_SeqAIJ*)B->data;
4712               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4713               rp2      = bj + bi[row];
4714               ap2      = ba + bi[row];
4715               rmax2    = bimax[row];
4716               nrow2    = bilen[row];
4717               low2     = 0;
4718               high2    = nrow2;
4719               bm       = aij->B->rmap.n;
4720               ba = b->a;
4721             }
4722           } else col = in[j];
4723           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4724         }
4725       }
4726     } else {
4727       if (!aij->donotstash) {
4728         if (roworiented) {
4729           if (ignorezeroentries && v[i*n] == 0.0) continue;
4730           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4731         } else {
4732           if (ignorezeroentries && v[i] == 0.0) continue;
4733           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4734         }
4735       }
4736     }
4737   }}
4738   PetscFunctionReturnVoid();
4739 }
4740 EXTERN_C_END
4741 
4742