xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8aa348c1bbf530458b5a7b3952a37e98498aa548)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_INFO) {
905       PetscInt   inodecount,inodelimit,*inodes;
906       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
907       if (inodes) {
908         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
911       }
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ  *Aloc;
938     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
942     if (!rank) {
943       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
944     } else {
945       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = mat->rmap.rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = mat->rmap.rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth     iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatRelax_MPIAIJ"
1015 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1016 {
1017   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1018   PetscErrorCode ierr;
1019   Vec            bb1;
1020 
1021   PetscFunctionBegin;
1022   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1023 
1024   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1025 
1026   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1027     if (flag & SOR_ZERO_INITIAL_GUESS) {
1028       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1029       its--;
1030     }
1031 
1032     while (its--) {
1033       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1034       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035 
1036       /* update rhs: bb1 = bb - B*x */
1037       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1038       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1039 
1040       /* local sweep */
1041       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1042       CHKERRQ(ierr);
1043     }
1044   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1045     if (flag & SOR_ZERO_INITIAL_GUESS) {
1046       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1047       its--;
1048     }
1049     while (its--) {
1050       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052 
1053       /* update rhs: bb1 = bb - B*x */
1054       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1055       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1056 
1057       /* local sweep */
1058       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1059       CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076       CHKERRQ(ierr);
1077     }
1078   } else {
1079     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1080   }
1081 
1082   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatPermute_MPIAIJ"
1088 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1089 {
1090   MPI_Comm       comm,pcomm;
1091   PetscInt       first,local_size,nrows,*rows;
1092   int            ntids;
1093   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1098   /* make a collective version of 'rowp' */
1099   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1100   if (pcomm==comm) {
1101     crowp = rowp;
1102   } else {
1103     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1104     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1105     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1106     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1107   }
1108   /* collect the global row permutation and invert it */
1109   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1110   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1111   if (pcomm!=comm) {
1112     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1113   }
1114   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1115   /* get the local target indices */
1116   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1119   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1120   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1121   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1122   /* the column permutation is so much easier;
1123      make a local version of 'colp' and invert it */
1124   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1125   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1126   if (ntids==1) {
1127     lcolp = colp;
1128   } else {
1129     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1130     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1131     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1132   }
1133   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1134   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1135   if (ntids>1) {
1136     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1137     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1138   }
1139   /* now we just get the submatrix */
1140   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1141   /* clean up */
1142   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1143   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1149 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1150 {
1151   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1152   Mat            A = mat->A,B = mat->B;
1153   PetscErrorCode ierr;
1154   PetscReal      isend[5],irecv[5];
1155 
1156   PetscFunctionBegin;
1157   info->block_size     = 1.0;
1158   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1159   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1160   isend[3] = info->memory;  isend[4] = info->mallocs;
1161   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1163   isend[3] += info->memory;  isend[4] += info->mallocs;
1164   if (flag == MAT_LOCAL) {
1165     info->nz_used      = isend[0];
1166     info->nz_allocated = isend[1];
1167     info->nz_unneeded  = isend[2];
1168     info->memory       = isend[3];
1169     info->mallocs      = isend[4];
1170   } else if (flag == MAT_GLOBAL_MAX) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else if (flag == MAT_GLOBAL_SUM) {
1178     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1179     info->nz_used      = irecv[0];
1180     info->nz_allocated = irecv[1];
1181     info->nz_unneeded  = irecv[2];
1182     info->memory       = irecv[3];
1183     info->mallocs      = irecv[4];
1184   }
1185   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1186   info->fill_ratio_needed = 0;
1187   info->factor_mallocs    = 0;
1188   info->rows_global       = (double)matin->rmap.N;
1189   info->columns_global    = (double)matin->cmap.N;
1190   info->rows_local        = (double)matin->rmap.n;
1191   info->columns_local     = (double)matin->cmap.N;
1192 
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSetOption_MPIAIJ"
1198 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1199 {
1200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   switch (op) {
1205   case MAT_NO_NEW_NONZERO_LOCATIONS:
1206   case MAT_YES_NEW_NONZERO_LOCATIONS:
1207   case MAT_COLUMNS_UNSORTED:
1208   case MAT_COLUMNS_SORTED:
1209   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1210   case MAT_KEEP_ZEROED_ROWS:
1211   case MAT_NEW_NONZERO_LOCATION_ERR:
1212   case MAT_USE_INODES:
1213   case MAT_DO_NOT_USE_INODES:
1214   case MAT_IGNORE_ZERO_ENTRIES:
1215     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1216     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1217     break;
1218   case MAT_ROW_ORIENTED:
1219     a->roworiented = PETSC_TRUE;
1220     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1221     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1222     break;
1223   case MAT_ROWS_SORTED:
1224   case MAT_ROWS_UNSORTED:
1225   case MAT_YES_NEW_DIAGONALS:
1226     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1227     break;
1228   case MAT_COLUMN_ORIENTED:
1229     a->roworiented = PETSC_FALSE;
1230     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1231     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1232     break;
1233   case MAT_IGNORE_OFF_PROC_ENTRIES:
1234     a->donotstash = PETSC_TRUE;
1235     break;
1236   case MAT_NO_NEW_DIAGONALS:
1237     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1238   case MAT_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1240     break;
1241   case MAT_STRUCTURALLY_SYMMETRIC:
1242   case MAT_HERMITIAN:
1243   case MAT_SYMMETRY_ETERNAL:
1244     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1245     break;
1246   case MAT_NOT_SYMMETRIC:
1247   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1248   case MAT_NOT_HERMITIAN:
1249   case MAT_NOT_SYMMETRY_ETERNAL:
1250     break;
1251   default:
1252     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetRow_MPIAIJ"
1259 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260 {
1261   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1262   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1263   PetscErrorCode ierr;
1264   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1265   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1266   PetscInt       *cmap,*idx_p;
1267 
1268   PetscFunctionBegin;
1269   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270   mat->getrowactive = PETSC_TRUE;
1271 
1272   if (!mat->rowvalues && (idx || v)) {
1273     /*
1274         allocate enough space to hold information from the longest row.
1275     */
1276     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1277     PetscInt     max = 1,tmp;
1278     for (i=0; i<matin->rmap.n; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280       if (max < tmp) { max = tmp; }
1281     }
1282     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1283     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1284   }
1285 
1286   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1287   lrow = row - rstart;
1288 
1289   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290   if (!v)   {pvA = 0; pvB = 0;}
1291   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   nztot = nzA + nzB;
1295 
1296   cmap  = mat->garray;
1297   if (v  || idx) {
1298     if (nztot) {
1299       /* Sort by increasing column numbers, assuming A and B already sorted */
1300       PetscInt imark = -1;
1301       if (v) {
1302         *v = v_p = mat->rowvalues;
1303         for (i=0; i<nzB; i++) {
1304           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1305           else break;
1306         }
1307         imark = i;
1308         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310       }
1311       if (idx) {
1312         *idx = idx_p = mat->rowindices;
1313         if (imark > -1) {
1314           for (i=0; i<imark; i++) {
1315             idx_p[i] = cmap[cworkB[i]];
1316           }
1317         } else {
1318           for (i=0; i<nzB; i++) {
1319             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1320             else break;
1321           }
1322           imark = i;
1323         }
1324         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1325         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1326       }
1327     } else {
1328       if (idx) *idx = 0;
1329       if (v)   *v   = 0;
1330     }
1331   }
1332   *nz = nztot;
1333   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1334   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1340 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1343 
1344   PetscFunctionBegin;
1345   if (!aij->getrowactive) {
1346     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1347   }
1348   aij->getrowactive = PETSC_FALSE;
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatNorm_MPIAIJ"
1354 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1355 {
1356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1357   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1358   PetscErrorCode ierr;
1359   PetscInt       i,j,cstart = mat->cmap.rstart;
1360   PetscReal      sum = 0.0;
1361   PetscScalar    *v;
1362 
1363   PetscFunctionBegin;
1364   if (aij->size == 1) {
1365     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1366   } else {
1367     if (type == NORM_FROBENIUS) {
1368       v = amat->a;
1369       for (i=0; i<amat->nz; i++) {
1370 #if defined(PETSC_USE_COMPLEX)
1371         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1372 #else
1373         sum += (*v)*(*v); v++;
1374 #endif
1375       }
1376       v = bmat->a;
1377       for (i=0; i<bmat->nz; i++) {
1378 #if defined(PETSC_USE_COMPLEX)
1379         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1380 #else
1381         sum += (*v)*(*v); v++;
1382 #endif
1383       }
1384       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1385       *norm = sqrt(*norm);
1386     } else if (type == NORM_1) { /* max column norm */
1387       PetscReal *tmp,*tmp2;
1388       PetscInt    *jj,*garray = aij->garray;
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1391       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1392       *norm = 0.0;
1393       v = amat->a; jj = amat->j;
1394       for (j=0; j<amat->nz; j++) {
1395         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1396       }
1397       v = bmat->a; jj = bmat->j;
1398       for (j=0; j<bmat->nz; j++) {
1399         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1400       }
1401       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1402       for (j=0; j<mat->cmap.N; j++) {
1403         if (tmp2[j] > *norm) *norm = tmp2[j];
1404       }
1405       ierr = PetscFree(tmp);CHKERRQ(ierr);
1406       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1407     } else if (type == NORM_INFINITY) { /* max row norm */
1408       PetscReal ntemp = 0.0;
1409       for (j=0; j<aij->A->rmap.n; j++) {
1410         v = amat->a + amat->i[j];
1411         sum = 0.0;
1412         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1413           sum += PetscAbsScalar(*v); v++;
1414         }
1415         v = bmat->a + bmat->i[j];
1416         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1417           sum += PetscAbsScalar(*v); v++;
1418         }
1419         if (sum > ntemp) ntemp = sum;
1420       }
1421       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1422     } else {
1423       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1424     }
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatTranspose_MPIAIJ"
1431 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1432 {
1433   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1434   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1435   PetscErrorCode ierr;
1436   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1437   Mat            B;
1438   PetscScalar    *array;
1439 
1440   PetscFunctionBegin;
1441   if (!matout && M != N) {
1442     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1443   }
1444 
1445   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1446   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1447   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1448   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1449 
1450   /* copy over the A part */
1451   Aloc = (Mat_SeqAIJ*)a->A->data;
1452   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1453   row = A->rmap.rstart;
1454   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1455   for (i=0; i<m; i++) {
1456     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1457     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1458   }
1459   aj = Aloc->j;
1460   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1461 
1462   /* copy over the B part */
1463   Aloc = (Mat_SeqAIJ*)a->B->data;
1464   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1465   row  = A->rmap.rstart;
1466   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1467   ct   = cols;
1468   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1469   for (i=0; i<m; i++) {
1470     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1471     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1472   }
1473   ierr = PetscFree(ct);CHKERRQ(ierr);
1474   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1475   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   if (matout) {
1477     *matout = B;
1478   } else {
1479     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1480   }
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1486 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1487 {
1488   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1489   Mat            a = aij->A,b = aij->B;
1490   PetscErrorCode ierr;
1491   PetscInt       s1,s2,s3;
1492 
1493   PetscFunctionBegin;
1494   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1495   if (rr) {
1496     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1497     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1498     /* Overlap communication with computation. */
1499     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1500   }
1501   if (ll) {
1502     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1503     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1504     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1505   }
1506   /* scale  the diagonal block */
1507   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1508 
1509   if (rr) {
1510     /* Do a scatter end and then right scale the off-diagonal block */
1511     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1512     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1513   }
1514 
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1520 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1521 {
1522   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1527   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1532 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1533 {
1534   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatEqual_MPIAIJ"
1544 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1545 {
1546   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1547   Mat            a,b,c,d;
1548   PetscTruth     flg;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   a = matA->A; b = matA->B;
1553   c = matB->A; d = matB->B;
1554 
1555   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1556   if (flg) {
1557     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1558   }
1559   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatCopy_MPIAIJ"
1565 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1566 {
1567   PetscErrorCode ierr;
1568   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1569   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1570 
1571   PetscFunctionBegin;
1572   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1573   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1574     /* because of the column compression in the off-processor part of the matrix a->B,
1575        the number of columns in a->B and b->B may be different, hence we cannot call
1576        the MatCopy() directly on the two parts. If need be, we can provide a more
1577        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1578        then copying the submatrices */
1579     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1580   } else {
1581     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1582     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1583   }
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1589 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1590 {
1591   PetscErrorCode ierr;
1592 
1593   PetscFunctionBegin;
1594   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #include "petscblaslapack.h"
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatAXPY_MPIAIJ"
1601 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1602 {
1603   PetscErrorCode ierr;
1604   PetscInt       i;
1605   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1606   PetscBLASInt   bnz,one=1;
1607   Mat_SeqAIJ     *x,*y;
1608 
1609   PetscFunctionBegin;
1610   if (str == SAME_NONZERO_PATTERN) {
1611     PetscScalar alpha = a;
1612     x = (Mat_SeqAIJ *)xx->A->data;
1613     y = (Mat_SeqAIJ *)yy->A->data;
1614     bnz = (PetscBLASInt)x->nz;
1615     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1616     x = (Mat_SeqAIJ *)xx->B->data;
1617     y = (Mat_SeqAIJ *)yy->B->data;
1618     bnz = (PetscBLASInt)x->nz;
1619     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1620   } else if (str == SUBSET_NONZERO_PATTERN) {
1621     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1622 
1623     x = (Mat_SeqAIJ *)xx->B->data;
1624     y = (Mat_SeqAIJ *)yy->B->data;
1625     if (y->xtoy && y->XtoY != xx->B) {
1626       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1627       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1628     }
1629     if (!y->xtoy) { /* get xtoy */
1630       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1631       y->XtoY = xx->B;
1632       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1633     }
1634     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1635   } else {
1636     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatConjugate_MPIAIJ"
1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1646 {
1647 #if defined(PETSC_USE_COMPLEX)
1648   PetscErrorCode ierr;
1649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1653   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1654 #else
1655   PetscFunctionBegin;
1656 #endif
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatRealPart_MPIAIJ"
1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1663 {
1664   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1665   PetscErrorCode ierr;
1666 
1667   PetscFunctionBegin;
1668   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1669   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1676 {
1677   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1682   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #ifdef PETSC_HAVE_PBGL
1687 
1688 #include <boost/parallel/mpi/bsp_process_group.hpp>
1689 #include <boost/graph/distributed/ilu_default_graph.hpp>
1690 #include <boost/graph/distributed/ilu_0_block.hpp>
1691 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1692 #include <boost/graph/distributed/petsc/interface.hpp>
1693 #include <boost/multi_array.hpp>
1694 #include <boost/parallel/distributed_property_map.hpp>
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1698 /*
1699   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1700 */
1701 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1702 {
1703   namespace petsc = boost::distributed::petsc;
1704 
1705   namespace graph_dist = boost::graph::distributed;
1706   using boost::graph::distributed::ilu_default::process_group_type;
1707   using boost::graph::ilu_permuted;
1708 
1709   PetscTruth      row_identity, col_identity;
1710   PetscContainer  c;
1711   PetscInt        m, n, M, N;
1712   PetscErrorCode  ierr;
1713 
1714   PetscFunctionBegin;
1715   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1716   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1717   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1718   if (!row_identity || !col_identity) {
1719     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1720   }
1721 
1722   process_group_type pg;
1723   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1724   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1725   lgraph_type&   level_graph = *lgraph_p;
1726   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1727 
1728   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1729   ilu_permuted(level_graph);
1730 
1731   /* put together the new matrix */
1732   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1733   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1734   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1735   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1736   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1737   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1738   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1739   (*fact)->factor = FACTOR_LU;
1740 
1741   ierr = PetscContainerCreate(A->comm, &c);
1742   ierr = PetscContainerSetPointer(c, lgraph_p);
1743   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1749 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1750 {
1751   PetscFunctionBegin;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatSolve_MPIAIJ"
1757 /*
1758   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1759 */
1760 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1761 {
1762   namespace graph_dist = boost::graph::distributed;
1763 
1764   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1765   lgraph_type*   lgraph_p;
1766   PetscContainer c;
1767   PetscErrorCode ierr;
1768 
1769   PetscFunctionBegin;
1770   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1771   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1772   ierr = VecCopy(b, x); CHKERRQ(ierr);
1773 
1774   PetscScalar* array_x;
1775   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1776   PetscInt sx;
1777   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1778 
1779   PetscScalar* array_b;
1780   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1781   PetscInt sb;
1782   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1783 
1784   lgraph_type&   level_graph = *lgraph_p;
1785   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1786 
1787   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1788   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1789                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1790 
1791   typedef boost::iterator_property_map<array_ref_type::iterator,
1792                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1793   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1794                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1795 
1796   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1797 
1798   PetscFunctionReturn(0);
1799 }
1800 #endif
1801 
1802 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1803   PetscInt       nzlocal,nsends,nrecvs;
1804   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1805   PetscScalar    *sbuf_a,**rbuf_a;
1806   PetscErrorCode (*MatDestroy)(Mat);
1807 } Mat_Redundant;
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1811 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1812 {
1813   PetscErrorCode       ierr;
1814   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1815   PetscInt             i;
1816 
1817   PetscFunctionBegin;
1818   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1819   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1820   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1821   for (i=0; i<redund->nrecvs; i++){
1822     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1823     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1824   }
1825   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1826   ierr = PetscFree(redund);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatDestroy_MatRedundant"
1832 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1833 {
1834   PetscErrorCode  ierr;
1835   PetscContainer  container;
1836   Mat_Redundant   *redund=PETSC_NULL;
1837 
1838   PetscFunctionBegin;
1839   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1840   if (container) {
1841     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1842   } else {
1843     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1844   }
1845   A->ops->destroy = redund->MatDestroy;
1846   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1847   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1848   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1854 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1855 {
1856   PetscMPIInt    rank,size;
1857   MPI_Comm       comm=mat->comm;
1858   PetscErrorCode ierr;
1859   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1860   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1861   PetscInt       *rowrange=mat->rmap.range;
1862   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1863   Mat            A=aij->A,B=aij->B,C=*matredundant;
1864   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1865   PetscScalar    *sbuf_a;
1866   PetscInt       nzlocal=a->nz+b->nz;
1867   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1868   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1869   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1870   PetscScalar    *vals,*aworkA,*aworkB;
1871   PetscMPIInt    tag1,tag2,tag3,imdex;
1872   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1873                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1874   MPI_Status     recv_status,*send_status;
1875   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1876   PetscInt       **rbuf_j=PETSC_NULL;
1877   PetscScalar    **rbuf_a=PETSC_NULL;
1878   Mat_Redundant  *redund=PETSC_NULL;
1879   PetscContainer container;
1880 
1881   PetscFunctionBegin;
1882   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1883   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1884 
1885   if (reuse == MAT_REUSE_MATRIX) {
1886     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1887     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1888     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1889     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1890     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1891     if (container) {
1892       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1893     } else {
1894       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1895     }
1896     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1897 
1898     nsends    = redund->nsends;
1899     nrecvs    = redund->nrecvs;
1900     send_rank = redund->send_rank; recv_rank = send_rank + size;
1901     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1902     sbuf_j    = redund->sbuf_j;
1903     sbuf_a    = redund->sbuf_a;
1904     rbuf_j    = redund->rbuf_j;
1905     rbuf_a    = redund->rbuf_a;
1906   }
1907 
1908   if (reuse == MAT_INITIAL_MATRIX){
1909     PetscMPIInt  subrank,subsize;
1910     PetscInt     nleftover,np_subcomm;
1911     /* get the destination processors' id send_rank, nsends and nrecvs */
1912     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1913     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1914     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1915     recv_rank = send_rank + size;
1916     np_subcomm = size/nsubcomm;
1917     nleftover  = size - nsubcomm*np_subcomm;
1918     nsends = 0; nrecvs = 0;
1919     for (i=0; i<size; i++){ /* i=rank*/
1920       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1921         send_rank[nsends] = i; nsends++;
1922         recv_rank[nrecvs++] = i;
1923       }
1924     }
1925     if (rank >= size - nleftover){/* this proc is a leftover processor */
1926       i = size-nleftover-1;
1927       j = 0;
1928       while (j < nsubcomm - nleftover){
1929         send_rank[nsends++] = i;
1930         i--; j++;
1931       }
1932     }
1933 
1934     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1935       for (i=0; i<nleftover; i++){
1936         recv_rank[nrecvs++] = size-nleftover+i;
1937       }
1938     }
1939 
1940     /* allocate sbuf_j, sbuf_a */
1941     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1942     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1943     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1944   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1945 
1946   /* copy mat's local entries into the buffers */
1947   if (reuse == MAT_INITIAL_MATRIX){
1948     rownz_max = 0;
1949     rptr = sbuf_j;
1950     cols = sbuf_j + rend-rstart + 1;
1951     vals = sbuf_a;
1952     rptr[0] = 0;
1953     for (i=0; i<rend-rstart; i++){
1954       row = i + rstart;
1955       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1956       ncols  = nzA + nzB;
1957       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1958       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1959       /* load the column indices for this row into cols */
1960       lwrite = 0;
1961       for (l=0; l<nzB; l++) {
1962         if ((ctmp = bmap[cworkB[l]]) < cstart){
1963           vals[lwrite]   = aworkB[l];
1964           cols[lwrite++] = ctmp;
1965         }
1966       }
1967       for (l=0; l<nzA; l++){
1968         vals[lwrite]   = aworkA[l];
1969         cols[lwrite++] = cstart + cworkA[l];
1970       }
1971       for (l=0; l<nzB; l++) {
1972         if ((ctmp = bmap[cworkB[l]]) >= cend){
1973           vals[lwrite]   = aworkB[l];
1974           cols[lwrite++] = ctmp;
1975         }
1976       }
1977       vals += ncols;
1978       cols += ncols;
1979       rptr[i+1] = rptr[i] + ncols;
1980       if (rownz_max < ncols) rownz_max = ncols;
1981     }
1982     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1983   } else { /* only copy matrix values into sbuf_a */
1984     rptr = sbuf_j;
1985     vals = sbuf_a;
1986     rptr[0] = 0;
1987     for (i=0; i<rend-rstart; i++){
1988       row = i + rstart;
1989       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1990       ncols  = nzA + nzB;
1991       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1992       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1993       lwrite = 0;
1994       for (l=0; l<nzB; l++) {
1995         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1996       }
1997       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1998       for (l=0; l<nzB; l++) {
1999         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2000       }
2001       vals += ncols;
2002       rptr[i+1] = rptr[i] + ncols;
2003     }
2004   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2005 
2006   /* send nzlocal to others, and recv other's nzlocal */
2007   /*--------------------------------------------------*/
2008   if (reuse == MAT_INITIAL_MATRIX){
2009     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2010     s_waits2 = s_waits3 + nsends;
2011     s_waits1 = s_waits2 + nsends;
2012     r_waits1 = s_waits1 + nsends;
2013     r_waits2 = r_waits1 + nrecvs;
2014     r_waits3 = r_waits2 + nrecvs;
2015   } else {
2016     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2017     r_waits3 = s_waits3 + nsends;
2018   }
2019 
2020   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2021   if (reuse == MAT_INITIAL_MATRIX){
2022     /* get new tags to keep the communication clean */
2023     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2024     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2025     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2026     rbuf_nz = sbuf_nz + nsends;
2027 
2028     /* post receives of other's nzlocal */
2029     for (i=0; i<nrecvs; i++){
2030       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2031     }
2032     /* send nzlocal to others */
2033     for (i=0; i<nsends; i++){
2034       sbuf_nz[i] = nzlocal;
2035       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2036     }
2037     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2038     count = nrecvs;
2039     while (count) {
2040       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2041       recv_rank[imdex] = recv_status.MPI_SOURCE;
2042       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2043       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2044 
2045       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2046       rbuf_nz[imdex] += i + 2;
2047       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2048       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2049       count--;
2050     }
2051     /* wait on sends of nzlocal */
2052     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2053     /* send mat->i,j to others, and recv from other's */
2054     /*------------------------------------------------*/
2055     for (i=0; i<nsends; i++){
2056       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2057       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2058     }
2059     /* wait on receives of mat->i,j */
2060     /*------------------------------*/
2061     count = nrecvs;
2062     while (count) {
2063       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2064       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2065       count--;
2066     }
2067     /* wait on sends of mat->i,j */
2068     /*---------------------------*/
2069     if (nsends) {
2070       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2071     }
2072   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2073 
2074   /* post receives, send and receive mat->a */
2075   /*----------------------------------------*/
2076   for (imdex=0; imdex<nrecvs; imdex++) {
2077     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2078   }
2079   for (i=0; i<nsends; i++){
2080     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2081   }
2082   count = nrecvs;
2083   while (count) {
2084     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2085     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2086     count--;
2087   }
2088   if (nsends) {
2089     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2090   }
2091 
2092   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2093 
2094   /* create redundant matrix */
2095   /*-------------------------*/
2096   if (reuse == MAT_INITIAL_MATRIX){
2097     /* compute rownz_max for preallocation */
2098     for (imdex=0; imdex<nrecvs; imdex++){
2099       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2100       rptr = rbuf_j[imdex];
2101       for (i=0; i<j; i++){
2102         ncols = rptr[i+1] - rptr[i];
2103         if (rownz_max < ncols) rownz_max = ncols;
2104       }
2105     }
2106 
2107     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2108     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2109     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2110     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2111     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2112   } else {
2113     C = *matredundant;
2114   }
2115 
2116   /* insert local matrix entries */
2117   rptr = sbuf_j;
2118   cols = sbuf_j + rend-rstart + 1;
2119   vals = sbuf_a;
2120   for (i=0; i<rend-rstart; i++){
2121     row   = i + rstart;
2122     ncols = rptr[i+1] - rptr[i];
2123     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2124     vals += ncols;
2125     cols += ncols;
2126   }
2127   /* insert received matrix entries */
2128   for (imdex=0; imdex<nrecvs; imdex++){
2129     rstart = rowrange[recv_rank[imdex]];
2130     rend   = rowrange[recv_rank[imdex]+1];
2131     rptr = rbuf_j[imdex];
2132     cols = rbuf_j[imdex] + rend-rstart + 1;
2133     vals = rbuf_a[imdex];
2134     for (i=0; i<rend-rstart; i++){
2135       row   = i + rstart;
2136       ncols = rptr[i+1] - rptr[i];
2137       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2138       vals += ncols;
2139       cols += ncols;
2140     }
2141   }
2142   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2144   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2145   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2146   if (reuse == MAT_INITIAL_MATRIX){
2147     PetscContainer container;
2148     *matredundant = C;
2149     /* create a supporting struct and attach it to C for reuse */
2150     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2151     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2152     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2153     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2154     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2155 
2156     redund->nzlocal = nzlocal;
2157     redund->nsends  = nsends;
2158     redund->nrecvs  = nrecvs;
2159     redund->send_rank = send_rank;
2160     redund->sbuf_nz = sbuf_nz;
2161     redund->sbuf_j  = sbuf_j;
2162     redund->sbuf_a  = sbuf_a;
2163     redund->rbuf_j  = rbuf_j;
2164     redund->rbuf_a  = rbuf_a;
2165 
2166     redund->MatDestroy = C->ops->destroy;
2167     C->ops->destroy    = MatDestroy_MatRedundant;
2168   }
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 /* -------------------------------------------------------------------*/
2173 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2174        MatGetRow_MPIAIJ,
2175        MatRestoreRow_MPIAIJ,
2176        MatMult_MPIAIJ,
2177 /* 4*/ MatMultAdd_MPIAIJ,
2178        MatMultTranspose_MPIAIJ,
2179        MatMultTransposeAdd_MPIAIJ,
2180 #ifdef PETSC_HAVE_PBGL
2181        MatSolve_MPIAIJ,
2182 #else
2183        0,
2184 #endif
2185        0,
2186        0,
2187 /*10*/ 0,
2188        0,
2189        0,
2190        MatRelax_MPIAIJ,
2191        MatTranspose_MPIAIJ,
2192 /*15*/ MatGetInfo_MPIAIJ,
2193        MatEqual_MPIAIJ,
2194        MatGetDiagonal_MPIAIJ,
2195        MatDiagonalScale_MPIAIJ,
2196        MatNorm_MPIAIJ,
2197 /*20*/ MatAssemblyBegin_MPIAIJ,
2198        MatAssemblyEnd_MPIAIJ,
2199        0,
2200        MatSetOption_MPIAIJ,
2201        MatZeroEntries_MPIAIJ,
2202 /*25*/ MatZeroRows_MPIAIJ,
2203        0,
2204 #ifdef PETSC_HAVE_PBGL
2205        MatLUFactorNumeric_MPIAIJ,
2206 #else
2207        0,
2208 #endif
2209        0,
2210        0,
2211 /*30*/ MatSetUpPreallocation_MPIAIJ,
2212 #ifdef PETSC_HAVE_PBGL
2213        MatILUFactorSymbolic_MPIAIJ,
2214 #else
2215        0,
2216 #endif
2217        0,
2218        0,
2219        0,
2220 /*35*/ MatDuplicate_MPIAIJ,
2221        0,
2222        0,
2223        0,
2224        0,
2225 /*40*/ MatAXPY_MPIAIJ,
2226        MatGetSubMatrices_MPIAIJ,
2227        MatIncreaseOverlap_MPIAIJ,
2228        MatGetValues_MPIAIJ,
2229        MatCopy_MPIAIJ,
2230 /*45*/ 0,
2231        MatScale_MPIAIJ,
2232        0,
2233        0,
2234        0,
2235 /*50*/ MatSetBlockSize_MPIAIJ,
2236        0,
2237        0,
2238        0,
2239        0,
2240 /*55*/ MatFDColoringCreate_MPIAIJ,
2241        0,
2242        MatSetUnfactored_MPIAIJ,
2243        MatPermute_MPIAIJ,
2244        0,
2245 /*60*/ MatGetSubMatrix_MPIAIJ,
2246        MatDestroy_MPIAIJ,
2247        MatView_MPIAIJ,
2248        0,
2249        0,
2250 /*65*/ 0,
2251        0,
2252        0,
2253        0,
2254        0,
2255 /*70*/ 0,
2256        0,
2257        MatSetColoring_MPIAIJ,
2258 #if defined(PETSC_HAVE_ADIC)
2259        MatSetValuesAdic_MPIAIJ,
2260 #else
2261        0,
2262 #endif
2263        MatSetValuesAdifor_MPIAIJ,
2264 /*75*/ 0,
2265        0,
2266        0,
2267        0,
2268        0,
2269 /*80*/ 0,
2270        0,
2271        0,
2272        0,
2273 /*84*/ MatLoad_MPIAIJ,
2274        0,
2275        0,
2276        0,
2277        0,
2278        0,
2279 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2280        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2281        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2282        MatPtAP_Basic,
2283        MatPtAPSymbolic_MPIAIJ,
2284 /*95*/ MatPtAPNumeric_MPIAIJ,
2285        0,
2286        0,
2287        0,
2288        0,
2289 /*100*/0,
2290        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2291        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2292        MatConjugate_MPIAIJ,
2293        0,
2294 /*105*/MatSetValuesRow_MPIAIJ,
2295        MatRealPart_MPIAIJ,
2296        MatImaginaryPart_MPIAIJ,
2297        0,
2298        0,
2299 /*110*/0,
2300        MatGetRedundantMatrix_MPIAIJ};
2301 
2302 /* ----------------------------------------------------------------------------------------*/
2303 
2304 EXTERN_C_BEGIN
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2307 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2308 {
2309   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2314   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 EXTERN_C_END
2318 
2319 EXTERN_C_BEGIN
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2322 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2323 {
2324   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2325   PetscErrorCode ierr;
2326 
2327   PetscFunctionBegin;
2328   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2329   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2330   PetscFunctionReturn(0);
2331 }
2332 EXTERN_C_END
2333 
2334 #include "petscpc.h"
2335 EXTERN_C_BEGIN
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2338 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2339 {
2340   Mat_MPIAIJ     *b;
2341   PetscErrorCode ierr;
2342   PetscInt       i;
2343 
2344   PetscFunctionBegin;
2345   B->preallocated = PETSC_TRUE;
2346   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2347   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2348   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2349   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2350 
2351   B->rmap.bs = B->cmap.bs = 1;
2352   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2353   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2354   if (d_nnz) {
2355     for (i=0; i<B->rmap.n; i++) {
2356       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2357     }
2358   }
2359   if (o_nnz) {
2360     for (i=0; i<B->rmap.n; i++) {
2361       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2362     }
2363   }
2364   b = (Mat_MPIAIJ*)B->data;
2365 
2366   /* Explicitly create 2 MATSEQAIJ matrices. */
2367   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2368   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2369   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2370   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2371   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2372   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2373   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2374   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2375 
2376   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2377   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2378 
2379   PetscFunctionReturn(0);
2380 }
2381 EXTERN_C_END
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2385 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2386 {
2387   Mat            mat;
2388   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   *newmat       = 0;
2393   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2394   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2395   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2396   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2397   a    = (Mat_MPIAIJ*)mat->data;
2398 
2399   mat->factor       = matin->factor;
2400   mat->rmap.bs      = matin->rmap.bs;
2401   mat->assembled    = PETSC_TRUE;
2402   mat->insertmode   = NOT_SET_VALUES;
2403   mat->preallocated = PETSC_TRUE;
2404 
2405   a->size           = oldmat->size;
2406   a->rank           = oldmat->rank;
2407   a->donotstash     = oldmat->donotstash;
2408   a->roworiented    = oldmat->roworiented;
2409   a->rowindices     = 0;
2410   a->rowvalues      = 0;
2411   a->getrowactive   = PETSC_FALSE;
2412 
2413   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2414   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2415 
2416   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2417   if (oldmat->colmap) {
2418 #if defined (PETSC_USE_CTABLE)
2419     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2420 #else
2421     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2422     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2423     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2424 #endif
2425   } else a->colmap = 0;
2426   if (oldmat->garray) {
2427     PetscInt len;
2428     len  = oldmat->B->cmap.n;
2429     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2430     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2431     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2432   } else a->garray = 0;
2433 
2434   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2435   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2436   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2437   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2438   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2439   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2440   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2441   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2442   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2443   *newmat = mat;
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #include "petscsys.h"
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatLoad_MPIAIJ"
2451 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2452 {
2453   Mat            A;
2454   PetscScalar    *vals,*svals;
2455   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2456   MPI_Status     status;
2457   PetscErrorCode ierr;
2458   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2459   PetscInt       i,nz,j,rstart,rend,mmax;
2460   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2461   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2462   PetscInt       cend,cstart,n,*rowners;
2463   int            fd;
2464 
2465   PetscFunctionBegin;
2466   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2467   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2468   if (!rank) {
2469     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2470     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2471     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2472   }
2473 
2474   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2475   M = header[1]; N = header[2];
2476   /* determine ownership of all rows */
2477   m    = M/size + ((M % size) > rank);
2478   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2479   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2480 
2481   /* First process needs enough room for process with most rows */
2482   if (!rank) {
2483     mmax       = rowners[1];
2484     for (i=2; i<size; i++) {
2485       mmax = PetscMax(mmax,rowners[i]);
2486     }
2487   } else mmax = m;
2488 
2489   rowners[0] = 0;
2490   for (i=2; i<=size; i++) {
2491     rowners[i] += rowners[i-1];
2492   }
2493   rstart = rowners[rank];
2494   rend   = rowners[rank+1];
2495 
2496   /* distribute row lengths to all processors */
2497   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2498   if (!rank) {
2499     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2500     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2501     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2502     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2503     for (j=0; j<m; j++) {
2504       procsnz[0] += ourlens[j];
2505     }
2506     for (i=1; i<size; i++) {
2507       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2508       /* calculate the number of nonzeros on each processor */
2509       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2510         procsnz[i] += rowlengths[j];
2511       }
2512       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2513     }
2514     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2515   } else {
2516     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2517   }
2518 
2519   if (!rank) {
2520     /* determine max buffer needed and allocate it */
2521     maxnz = 0;
2522     for (i=0; i<size; i++) {
2523       maxnz = PetscMax(maxnz,procsnz[i]);
2524     }
2525     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2526 
2527     /* read in my part of the matrix column indices  */
2528     nz   = procsnz[0];
2529     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2530     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2531 
2532     /* read in every one elses and ship off */
2533     for (i=1; i<size; i++) {
2534       nz   = procsnz[i];
2535       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2536       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2537     }
2538     ierr = PetscFree(cols);CHKERRQ(ierr);
2539   } else {
2540     /* determine buffer space needed for message */
2541     nz = 0;
2542     for (i=0; i<m; i++) {
2543       nz += ourlens[i];
2544     }
2545     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2546 
2547     /* receive message of column indices*/
2548     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2549     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2550     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2551   }
2552 
2553   /* determine column ownership if matrix is not square */
2554   if (N != M) {
2555     n      = N/size + ((N % size) > rank);
2556     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2557     cstart = cend - n;
2558   } else {
2559     cstart = rstart;
2560     cend   = rend;
2561     n      = cend - cstart;
2562   }
2563 
2564   /* loop over local rows, determining number of off diagonal entries */
2565   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2566   jj = 0;
2567   for (i=0; i<m; i++) {
2568     for (j=0; j<ourlens[i]; j++) {
2569       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2570       jj++;
2571     }
2572   }
2573 
2574   /* create our matrix */
2575   for (i=0; i<m; i++) {
2576     ourlens[i] -= offlens[i];
2577   }
2578   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2579   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2580   ierr = MatSetType(A,type);CHKERRQ(ierr);
2581   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2582 
2583   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2584   for (i=0; i<m; i++) {
2585     ourlens[i] += offlens[i];
2586   }
2587 
2588   if (!rank) {
2589     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2590 
2591     /* read in my part of the matrix numerical values  */
2592     nz   = procsnz[0];
2593     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2594 
2595     /* insert into matrix */
2596     jj      = rstart;
2597     smycols = mycols;
2598     svals   = vals;
2599     for (i=0; i<m; i++) {
2600       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2601       smycols += ourlens[i];
2602       svals   += ourlens[i];
2603       jj++;
2604     }
2605 
2606     /* read in other processors and ship out */
2607     for (i=1; i<size; i++) {
2608       nz   = procsnz[i];
2609       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2610       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2611     }
2612     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2613   } else {
2614     /* receive numeric values */
2615     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2616 
2617     /* receive message of values*/
2618     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2619     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2620     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2621 
2622     /* insert into matrix */
2623     jj      = rstart;
2624     smycols = mycols;
2625     svals   = vals;
2626     for (i=0; i<m; i++) {
2627       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2628       smycols += ourlens[i];
2629       svals   += ourlens[i];
2630       jj++;
2631     }
2632   }
2633   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2634   ierr = PetscFree(vals);CHKERRQ(ierr);
2635   ierr = PetscFree(mycols);CHKERRQ(ierr);
2636   ierr = PetscFree(rowners);CHKERRQ(ierr);
2637 
2638   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2639   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2640   *newmat = A;
2641   PetscFunctionReturn(0);
2642 }
2643 
2644 #undef __FUNCT__
2645 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2646 /*
2647     Not great since it makes two copies of the submatrix, first an SeqAIJ
2648   in local and then by concatenating the local matrices the end result.
2649   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2650 */
2651 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2652 {
2653   PetscErrorCode ierr;
2654   PetscMPIInt    rank,size;
2655   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2656   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2657   Mat            *local,M,Mreuse;
2658   PetscScalar    *vwork,*aa;
2659   MPI_Comm       comm = mat->comm;
2660   Mat_SeqAIJ     *aij;
2661 
2662 
2663   PetscFunctionBegin;
2664   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2665   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2666 
2667   if (call ==  MAT_REUSE_MATRIX) {
2668     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2669     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2670     local = &Mreuse;
2671     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2672   } else {
2673     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2674     Mreuse = *local;
2675     ierr   = PetscFree(local);CHKERRQ(ierr);
2676   }
2677 
2678   /*
2679       m - number of local rows
2680       n - number of columns (same on all processors)
2681       rstart - first row in new global matrix generated
2682   */
2683   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2684   if (call == MAT_INITIAL_MATRIX) {
2685     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2686     ii  = aij->i;
2687     jj  = aij->j;
2688 
2689     /*
2690         Determine the number of non-zeros in the diagonal and off-diagonal
2691         portions of the matrix in order to do correct preallocation
2692     */
2693 
2694     /* first get start and end of "diagonal" columns */
2695     if (csize == PETSC_DECIDE) {
2696       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2697       if (mglobal == n) { /* square matrix */
2698 	nlocal = m;
2699       } else {
2700         nlocal = n/size + ((n % size) > rank);
2701       }
2702     } else {
2703       nlocal = csize;
2704     }
2705     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2706     rstart = rend - nlocal;
2707     if (rank == size - 1 && rend != n) {
2708       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2709     }
2710 
2711     /* next, compute all the lengths */
2712     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2713     olens = dlens + m;
2714     for (i=0; i<m; i++) {
2715       jend = ii[i+1] - ii[i];
2716       olen = 0;
2717       dlen = 0;
2718       for (j=0; j<jend; j++) {
2719         if (*jj < rstart || *jj >= rend) olen++;
2720         else dlen++;
2721         jj++;
2722       }
2723       olens[i] = olen;
2724       dlens[i] = dlen;
2725     }
2726     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2727     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2728     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2729     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2730     ierr = PetscFree(dlens);CHKERRQ(ierr);
2731   } else {
2732     PetscInt ml,nl;
2733 
2734     M = *newmat;
2735     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2736     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2737     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2738     /*
2739          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2740        rather than the slower MatSetValues().
2741     */
2742     M->was_assembled = PETSC_TRUE;
2743     M->assembled     = PETSC_FALSE;
2744   }
2745   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2746   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2747   ii  = aij->i;
2748   jj  = aij->j;
2749   aa  = aij->a;
2750   for (i=0; i<m; i++) {
2751     row   = rstart + i;
2752     nz    = ii[i+1] - ii[i];
2753     cwork = jj;     jj += nz;
2754     vwork = aa;     aa += nz;
2755     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2756   }
2757 
2758   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760   *newmat = M;
2761 
2762   /* save submatrix used in processor for next request */
2763   if (call ==  MAT_INITIAL_MATRIX) {
2764     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2765     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2766   }
2767 
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 EXTERN_C_BEGIN
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2774 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2775 {
2776   PetscInt       m,cstart, cend,j,nnz,i,d;
2777   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2778   const PetscInt *JJ;
2779   PetscScalar    *values;
2780   PetscErrorCode ierr;
2781 
2782   PetscFunctionBegin;
2783   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2784 
2785   B->rmap.bs = B->cmap.bs = 1;
2786   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2787   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2788   m      = B->rmap.n;
2789   cstart = B->cmap.rstart;
2790   cend   = B->cmap.rend;
2791   rstart = B->rmap.rstart;
2792 
2793   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2794   o_nnz = d_nnz + m;
2795 
2796   for (i=0; i<m; i++) {
2797     nnz     = Ii[i+1]- Ii[i];
2798     JJ      = J + Ii[i];
2799     nnz_max = PetscMax(nnz_max,nnz);
2800     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2801     for (j=0; j<nnz; j++) {
2802       if (*JJ >= cstart) break;
2803       JJ++;
2804     }
2805     d = 0;
2806     for (; j<nnz; j++) {
2807       if (*JJ++ >= cend) break;
2808       d++;
2809     }
2810     d_nnz[i] = d;
2811     o_nnz[i] = nnz - d;
2812   }
2813   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2814   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2815 
2816   if (v) values = (PetscScalar*)v;
2817   else {
2818     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2819     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2820   }
2821 
2822   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2823   for (i=0; i<m; i++) {
2824     ii   = i + rstart;
2825     nnz  = Ii[i+1]- Ii[i];
2826     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2827   }
2828   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2829   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2830   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2831 
2832   if (!v) {
2833     ierr = PetscFree(values);CHKERRQ(ierr);
2834   }
2835   PetscFunctionReturn(0);
2836 }
2837 EXTERN_C_END
2838 
2839 #undef __FUNCT__
2840 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2841 /*@
2842    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2843    (the default parallel PETSc format).
2844 
2845    Collective on MPI_Comm
2846 
2847    Input Parameters:
2848 +  B - the matrix
2849 .  i - the indices into j for the start of each local row (starts with zero)
2850 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2851 -  v - optional values in the matrix
2852 
2853    Level: developer
2854 
2855    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2856      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2857 
2858 .keywords: matrix, aij, compressed row, sparse, parallel
2859 
2860 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2861           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2862 @*/
2863 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2864 {
2865   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2866 
2867   PetscFunctionBegin;
2868   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2869   if (f) {
2870     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2871   }
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 #undef __FUNCT__
2876 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2877 /*@C
2878    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2879    (the default parallel PETSc format).  For good matrix assembly performance
2880    the user should preallocate the matrix storage by setting the parameters
2881    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2882    performance can be increased by more than a factor of 50.
2883 
2884    Collective on MPI_Comm
2885 
2886    Input Parameters:
2887 +  A - the matrix
2888 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2889            (same value is used for all local rows)
2890 .  d_nnz - array containing the number of nonzeros in the various rows of the
2891            DIAGONAL portion of the local submatrix (possibly different for each row)
2892            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2893            The size of this array is equal to the number of local rows, i.e 'm'.
2894            You must leave room for the diagonal entry even if it is zero.
2895 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2896            submatrix (same value is used for all local rows).
2897 -  o_nnz - array containing the number of nonzeros in the various rows of the
2898            OFF-DIAGONAL portion of the local submatrix (possibly different for
2899            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2900            structure. The size of this array is equal to the number
2901            of local rows, i.e 'm'.
2902 
2903    If the *_nnz parameter is given then the *_nz parameter is ignored
2904 
2905    The AIJ format (also called the Yale sparse matrix format or
2906    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2907    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2908 
2909    The parallel matrix is partitioned such that the first m0 rows belong to
2910    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2911    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2912 
2913    The DIAGONAL portion of the local submatrix of a processor can be defined
2914    as the submatrix which is obtained by extraction the part corresponding
2915    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2916    first row that belongs to the processor, and r2 is the last row belonging
2917    to the this processor. This is a square mxm matrix. The remaining portion
2918    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2919 
2920    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2921 
2922    Example usage:
2923 
2924    Consider the following 8x8 matrix with 34 non-zero values, that is
2925    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2926    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2927    as follows:
2928 
2929 .vb
2930             1  2  0  |  0  3  0  |  0  4
2931     Proc0   0  5  6  |  7  0  0  |  8  0
2932             9  0 10  | 11  0  0  | 12  0
2933     -------------------------------------
2934            13  0 14  | 15 16 17  |  0  0
2935     Proc1   0 18  0  | 19 20 21  |  0  0
2936             0  0  0  | 22 23  0  | 24  0
2937     -------------------------------------
2938     Proc2  25 26 27  |  0  0 28  | 29  0
2939            30  0  0  | 31 32 33  |  0 34
2940 .ve
2941 
2942    This can be represented as a collection of submatrices as:
2943 
2944 .vb
2945       A B C
2946       D E F
2947       G H I
2948 .ve
2949 
2950    Where the submatrices A,B,C are owned by proc0, D,E,F are
2951    owned by proc1, G,H,I are owned by proc2.
2952 
2953    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2954    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2955    The 'M','N' parameters are 8,8, and have the same values on all procs.
2956 
2957    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2958    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2959    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2960    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2961    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2962    matrix, ans [DF] as another SeqAIJ matrix.
2963 
2964    When d_nz, o_nz parameters are specified, d_nz storage elements are
2965    allocated for every row of the local diagonal submatrix, and o_nz
2966    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2967    One way to choose d_nz and o_nz is to use the max nonzerors per local
2968    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2969    In this case, the values of d_nz,o_nz are:
2970 .vb
2971      proc0 : dnz = 2, o_nz = 2
2972      proc1 : dnz = 3, o_nz = 2
2973      proc2 : dnz = 1, o_nz = 4
2974 .ve
2975    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2976    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2977    for proc3. i.e we are using 12+15+10=37 storage locations to store
2978    34 values.
2979 
2980    When d_nnz, o_nnz parameters are specified, the storage is specified
2981    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2982    In the above case the values for d_nnz,o_nnz are:
2983 .vb
2984      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2985      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2986      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2987 .ve
2988    Here the space allocated is sum of all the above values i.e 34, and
2989    hence pre-allocation is perfect.
2990 
2991    Level: intermediate
2992 
2993 .keywords: matrix, aij, compressed row, sparse, parallel
2994 
2995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2996           MPIAIJ
2997 @*/
2998 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2999 {
3000   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3001 
3002   PetscFunctionBegin;
3003   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3004   if (f) {
3005     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3006   }
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3012 /*@C
3013      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3014          CSR format the local rows.
3015 
3016    Collective on MPI_Comm
3017 
3018    Input Parameters:
3019 +  comm - MPI communicator
3020 .  m - number of local rows (Cannot be PETSC_DECIDE)
3021 .  n - This value should be the same as the local size used in creating the
3022        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3023        calculated if N is given) For square matrices n is almost always m.
3024 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3025 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3026 .   i - row indices
3027 .   j - column indices
3028 -   a - matrix values
3029 
3030    Output Parameter:
3031 .   mat - the matrix
3032 
3033    Level: intermediate
3034 
3035    Notes:
3036        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3037      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3038      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3039 
3040        The i and j indices are 0 based
3041 
3042 .keywords: matrix, aij, compressed row, sparse, parallel
3043 
3044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3045           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3046 @*/
3047 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3048 {
3049   PetscErrorCode ierr;
3050 
3051  PetscFunctionBegin;
3052   if (i[0]) {
3053     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3054   }
3055   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3056   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3057   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3058   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatCreateMPIAIJ"
3064 /*@C
3065    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3066    (the default parallel PETSc format).  For good matrix assembly performance
3067    the user should preallocate the matrix storage by setting the parameters
3068    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3069    performance can be increased by more than a factor of 50.
3070 
3071    Collective on MPI_Comm
3072 
3073    Input Parameters:
3074 +  comm - MPI communicator
3075 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3076            This value should be the same as the local size used in creating the
3077            y vector for the matrix-vector product y = Ax.
3078 .  n - This value should be the same as the local size used in creating the
3079        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3080        calculated if N is given) For square matrices n is almost always m.
3081 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3082 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3083 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3084            (same value is used for all local rows)
3085 .  d_nnz - array containing the number of nonzeros in the various rows of the
3086            DIAGONAL portion of the local submatrix (possibly different for each row)
3087            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3088            The size of this array is equal to the number of local rows, i.e 'm'.
3089            You must leave room for the diagonal entry even if it is zero.
3090 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3091            submatrix (same value is used for all local rows).
3092 -  o_nnz - array containing the number of nonzeros in the various rows of the
3093            OFF-DIAGONAL portion of the local submatrix (possibly different for
3094            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3095            structure. The size of this array is equal to the number
3096            of local rows, i.e 'm'.
3097 
3098    Output Parameter:
3099 .  A - the matrix
3100 
3101    Notes:
3102    If the *_nnz parameter is given then the *_nz parameter is ignored
3103 
3104    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3105    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3106    storage requirements for this matrix.
3107 
3108    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3109    processor than it must be used on all processors that share the object for
3110    that argument.
3111 
3112    The user MUST specify either the local or global matrix dimensions
3113    (possibly both).
3114 
3115    The parallel matrix is partitioned such that the first m0 rows belong to
3116    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3117    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3118 
3119    The DIAGONAL portion of the local submatrix of a processor can be defined
3120    as the submatrix which is obtained by extraction the part corresponding
3121    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3122    first row that belongs to the processor, and r2 is the last row belonging
3123    to the this processor. This is a square mxm matrix. The remaining portion
3124    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3125 
3126    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3127 
3128    When calling this routine with a single process communicator, a matrix of
3129    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3130    type of communicator, use the construction mechanism:
3131      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3132 
3133    By default, this format uses inodes (identical nodes) when possible.
3134    We search for consecutive rows with the same nonzero structure, thereby
3135    reusing matrix information to achieve increased efficiency.
3136 
3137    Options Database Keys:
3138 +  -mat_no_inode  - Do not use inodes
3139 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3140 -  -mat_aij_oneindex - Internally use indexing starting at 1
3141         rather than 0.  Note that when calling MatSetValues(),
3142         the user still MUST index entries starting at 0!
3143 
3144 
3145    Example usage:
3146 
3147    Consider the following 8x8 matrix with 34 non-zero values, that is
3148    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3149    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3150    as follows:
3151 
3152 .vb
3153             1  2  0  |  0  3  0  |  0  4
3154     Proc0   0  5  6  |  7  0  0  |  8  0
3155             9  0 10  | 11  0  0  | 12  0
3156     -------------------------------------
3157            13  0 14  | 15 16 17  |  0  0
3158     Proc1   0 18  0  | 19 20 21  |  0  0
3159             0  0  0  | 22 23  0  | 24  0
3160     -------------------------------------
3161     Proc2  25 26 27  |  0  0 28  | 29  0
3162            30  0  0  | 31 32 33  |  0 34
3163 .ve
3164 
3165    This can be represented as a collection of submatrices as:
3166 
3167 .vb
3168       A B C
3169       D E F
3170       G H I
3171 .ve
3172 
3173    Where the submatrices A,B,C are owned by proc0, D,E,F are
3174    owned by proc1, G,H,I are owned by proc2.
3175 
3176    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3177    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3178    The 'M','N' parameters are 8,8, and have the same values on all procs.
3179 
3180    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3181    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3182    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3183    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3184    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3185    matrix, ans [DF] as another SeqAIJ matrix.
3186 
3187    When d_nz, o_nz parameters are specified, d_nz storage elements are
3188    allocated for every row of the local diagonal submatrix, and o_nz
3189    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3190    One way to choose d_nz and o_nz is to use the max nonzerors per local
3191    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3192    In this case, the values of d_nz,o_nz are:
3193 .vb
3194      proc0 : dnz = 2, o_nz = 2
3195      proc1 : dnz = 3, o_nz = 2
3196      proc2 : dnz = 1, o_nz = 4
3197 .ve
3198    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3199    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3200    for proc3. i.e we are using 12+15+10=37 storage locations to store
3201    34 values.
3202 
3203    When d_nnz, o_nnz parameters are specified, the storage is specified
3204    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3205    In the above case the values for d_nnz,o_nnz are:
3206 .vb
3207      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3208      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3209      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3210 .ve
3211    Here the space allocated is sum of all the above values i.e 34, and
3212    hence pre-allocation is perfect.
3213 
3214    Level: intermediate
3215 
3216 .keywords: matrix, aij, compressed row, sparse, parallel
3217 
3218 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3219           MPIAIJ, MatCreateMPIAIJWithArrays()
3220 @*/
3221 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3222 {
3223   PetscErrorCode ierr;
3224   PetscMPIInt    size;
3225 
3226   PetscFunctionBegin;
3227   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3228   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3229   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3230   if (size > 1) {
3231     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3232     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3233   } else {
3234     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3235     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3236   }
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3242 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3243 {
3244   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3245 
3246   PetscFunctionBegin;
3247   *Ad     = a->A;
3248   *Ao     = a->B;
3249   *colmap = a->garray;
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 #undef __FUNCT__
3254 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3255 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3256 {
3257   PetscErrorCode ierr;
3258   PetscInt       i;
3259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3260 
3261   PetscFunctionBegin;
3262   if (coloring->ctype == IS_COLORING_GLOBAL) {
3263     ISColoringValue *allcolors,*colors;
3264     ISColoring      ocoloring;
3265 
3266     /* set coloring for diagonal portion */
3267     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3268 
3269     /* set coloring for off-diagonal portion */
3270     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3271     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3272     for (i=0; i<a->B->cmap.n; i++) {
3273       colors[i] = allcolors[a->garray[i]];
3274     }
3275     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3276     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3277     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3278     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3279   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3280     ISColoringValue *colors;
3281     PetscInt        *larray;
3282     ISColoring      ocoloring;
3283 
3284     /* set coloring for diagonal portion */
3285     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3286     for (i=0; i<a->A->cmap.n; i++) {
3287       larray[i] = i + A->cmap.rstart;
3288     }
3289     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3290     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3291     for (i=0; i<a->A->cmap.n; i++) {
3292       colors[i] = coloring->colors[larray[i]];
3293     }
3294     ierr = PetscFree(larray);CHKERRQ(ierr);
3295     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3296     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3297     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3298 
3299     /* set coloring for off-diagonal portion */
3300     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3301     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3302     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3303     for (i=0; i<a->B->cmap.n; i++) {
3304       colors[i] = coloring->colors[larray[i]];
3305     }
3306     ierr = PetscFree(larray);CHKERRQ(ierr);
3307     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3308     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3309     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3310   } else {
3311     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3312   }
3313 
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 #if defined(PETSC_HAVE_ADIC)
3318 #undef __FUNCT__
3319 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3320 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3321 {
3322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3323   PetscErrorCode ierr;
3324 
3325   PetscFunctionBegin;
3326   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3327   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3328   PetscFunctionReturn(0);
3329 }
3330 #endif
3331 
3332 #undef __FUNCT__
3333 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3334 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3335 {
3336   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3337   PetscErrorCode ierr;
3338 
3339   PetscFunctionBegin;
3340   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3341   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 #undef __FUNCT__
3346 #define __FUNCT__ "MatMerge"
3347 /*@C
3348       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3349                  matrices from each processor
3350 
3351     Collective on MPI_Comm
3352 
3353    Input Parameters:
3354 +    comm - the communicators the parallel matrix will live on
3355 .    inmat - the input sequential matrices
3356 .    n - number of local columns (or PETSC_DECIDE)
3357 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3358 
3359    Output Parameter:
3360 .    outmat - the parallel matrix generated
3361 
3362     Level: advanced
3363 
3364    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3365 
3366 @*/
3367 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3368 {
3369   PetscErrorCode ierr;
3370   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3371   PetscInt       *indx;
3372   PetscScalar    *values;
3373 
3374   PetscFunctionBegin;
3375   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3376   if (scall == MAT_INITIAL_MATRIX){
3377     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3378     if (n == PETSC_DECIDE){
3379       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3380     }
3381     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3382     rstart -= m;
3383 
3384     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3385     for (i=0;i<m;i++) {
3386       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3387       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3388       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3389     }
3390     /* This routine will ONLY return MPIAIJ type matrix */
3391     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3392     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3393     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3394     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3395     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3396 
3397   } else if (scall == MAT_REUSE_MATRIX){
3398     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3399   } else {
3400     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3401   }
3402 
3403   for (i=0;i<m;i++) {
3404     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3405     Ii    = i + rstart;
3406     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3407     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3408   }
3409   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3410   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3411   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412 
3413   PetscFunctionReturn(0);
3414 }
3415 
3416 #undef __FUNCT__
3417 #define __FUNCT__ "MatFileSplit"
3418 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3419 {
3420   PetscErrorCode    ierr;
3421   PetscMPIInt       rank;
3422   PetscInt          m,N,i,rstart,nnz;
3423   size_t            len;
3424   const PetscInt    *indx;
3425   PetscViewer       out;
3426   char              *name;
3427   Mat               B;
3428   const PetscScalar *values;
3429 
3430   PetscFunctionBegin;
3431   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3432   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3433   /* Should this be the type of the diagonal block of A? */
3434   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3435   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3436   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3437   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3438   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3439   for (i=0;i<m;i++) {
3440     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3441     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3442     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3443   }
3444   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3445   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3446 
3447   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3448   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3449   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3450   sprintf(name,"%s.%d",outfile,rank);
3451   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3452   ierr = PetscFree(name);
3453   ierr = MatView(B,out);CHKERRQ(ierr);
3454   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3455   ierr = MatDestroy(B);CHKERRQ(ierr);
3456   PetscFunctionReturn(0);
3457 }
3458 
3459 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3460 #undef __FUNCT__
3461 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3462 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3463 {
3464   PetscErrorCode       ierr;
3465   Mat_Merge_SeqsToMPI  *merge;
3466   PetscContainer       container;
3467 
3468   PetscFunctionBegin;
3469   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3470   if (container) {
3471     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3472     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3473     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3474     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3475     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3476     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3477     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3478     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3479     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3482     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3483 
3484     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3485     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3486   }
3487   ierr = PetscFree(merge);CHKERRQ(ierr);
3488 
3489   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #include "src/mat/utils/freespace.h"
3494 #include "petscbt.h"
3495 static PetscEvent logkey_seqstompinum = 0;
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3498 /*@C
3499       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3500                  matrices from each processor
3501 
3502     Collective on MPI_Comm
3503 
3504    Input Parameters:
3505 +    comm - the communicators the parallel matrix will live on
3506 .    seqmat - the input sequential matrices
3507 .    m - number of local rows (or PETSC_DECIDE)
3508 .    n - number of local columns (or PETSC_DECIDE)
3509 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3510 
3511    Output Parameter:
3512 .    mpimat - the parallel matrix generated
3513 
3514     Level: advanced
3515 
3516    Notes:
3517      The dimensions of the sequential matrix in each processor MUST be the same.
3518      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3519      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3520 @*/
3521 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3522 {
3523   PetscErrorCode       ierr;
3524   MPI_Comm             comm=mpimat->comm;
3525   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3526   PetscMPIInt          size,rank,taga,*len_s;
3527   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3528   PetscInt             proc,m;
3529   PetscInt             **buf_ri,**buf_rj;
3530   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3531   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3532   MPI_Request          *s_waits,*r_waits;
3533   MPI_Status           *status;
3534   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3535   Mat_Merge_SeqsToMPI  *merge;
3536   PetscContainer       container;
3537 
3538   PetscFunctionBegin;
3539   if (!logkey_seqstompinum) {
3540     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3541   }
3542   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3543 
3544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3545   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3546 
3547   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3548   if (container) {
3549     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3550   }
3551   bi     = merge->bi;
3552   bj     = merge->bj;
3553   buf_ri = merge->buf_ri;
3554   buf_rj = merge->buf_rj;
3555 
3556   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3557   owners = merge->rowmap.range;
3558   len_s  = merge->len_s;
3559 
3560   /* send and recv matrix values */
3561   /*-----------------------------*/
3562   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3563   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3564 
3565   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3566   for (proc=0,k=0; proc<size; proc++){
3567     if (!len_s[proc]) continue;
3568     i = owners[proc];
3569     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3570     k++;
3571   }
3572 
3573   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3574   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3575   ierr = PetscFree(status);CHKERRQ(ierr);
3576 
3577   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3578   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3579 
3580   /* insert mat values of mpimat */
3581   /*----------------------------*/
3582   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3583   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3584   nextrow = buf_ri_k + merge->nrecv;
3585   nextai  = nextrow + merge->nrecv;
3586 
3587   for (k=0; k<merge->nrecv; k++){
3588     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3589     nrows = *(buf_ri_k[k]);
3590     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3591     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3592   }
3593 
3594   /* set values of ba */
3595   m = merge->rowmap.n;
3596   for (i=0; i<m; i++) {
3597     arow = owners[rank] + i;
3598     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3599     bnzi = bi[i+1] - bi[i];
3600     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3601 
3602     /* add local non-zero vals of this proc's seqmat into ba */
3603     anzi = ai[arow+1] - ai[arow];
3604     aj   = a->j + ai[arow];
3605     aa   = a->a + ai[arow];
3606     nextaj = 0;
3607     for (j=0; nextaj<anzi; j++){
3608       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3609         ba_i[j] += aa[nextaj++];
3610       }
3611     }
3612 
3613     /* add received vals into ba */
3614     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3615       /* i-th row */
3616       if (i == *nextrow[k]) {
3617         anzi = *(nextai[k]+1) - *nextai[k];
3618         aj   = buf_rj[k] + *(nextai[k]);
3619         aa   = abuf_r[k] + *(nextai[k]);
3620         nextaj = 0;
3621         for (j=0; nextaj<anzi; j++){
3622           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3623             ba_i[j] += aa[nextaj++];
3624           }
3625         }
3626         nextrow[k]++; nextai[k]++;
3627       }
3628     }
3629     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3630   }
3631   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633 
3634   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3635   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3636   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3637   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 static PetscEvent logkey_seqstompisym = 0;
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3645 {
3646   PetscErrorCode       ierr;
3647   Mat                  B_mpi;
3648   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3649   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3650   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3651   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3652   PetscInt             len,proc,*dnz,*onz;
3653   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3654   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3655   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3656   MPI_Status           *status;
3657   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3658   PetscBT              lnkbt;
3659   Mat_Merge_SeqsToMPI  *merge;
3660   PetscContainer       container;
3661 
3662   PetscFunctionBegin;
3663   if (!logkey_seqstompisym) {
3664     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3665   }
3666   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3667 
3668   /* make sure it is a PETSc comm */
3669   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3670   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3671   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3672 
3673   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3674   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3675 
3676   /* determine row ownership */
3677   /*---------------------------------------------------------*/
3678   merge->rowmap.n = m;
3679   merge->rowmap.N = M;
3680   merge->rowmap.bs = 1;
3681   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3682   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3683   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3684 
3685   m      = merge->rowmap.n;
3686   M      = merge->rowmap.N;
3687   owners = merge->rowmap.range;
3688 
3689   /* determine the number of messages to send, their lengths */
3690   /*---------------------------------------------------------*/
3691   len_s  = merge->len_s;
3692 
3693   len = 0;  /* length of buf_si[] */
3694   merge->nsend = 0;
3695   for (proc=0; proc<size; proc++){
3696     len_si[proc] = 0;
3697     if (proc == rank){
3698       len_s[proc] = 0;
3699     } else {
3700       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3701       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3702     }
3703     if (len_s[proc]) {
3704       merge->nsend++;
3705       nrows = 0;
3706       for (i=owners[proc]; i<owners[proc+1]; i++){
3707         if (ai[i+1] > ai[i]) nrows++;
3708       }
3709       len_si[proc] = 2*(nrows+1);
3710       len += len_si[proc];
3711     }
3712   }
3713 
3714   /* determine the number and length of messages to receive for ij-structure */
3715   /*-------------------------------------------------------------------------*/
3716   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3717   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3718 
3719   /* post the Irecv of j-structure */
3720   /*-------------------------------*/
3721   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3722   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3723 
3724   /* post the Isend of j-structure */
3725   /*--------------------------------*/
3726   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3727   sj_waits = si_waits + merge->nsend;
3728 
3729   for (proc=0, k=0; proc<size; proc++){
3730     if (!len_s[proc]) continue;
3731     i = owners[proc];
3732     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3733     k++;
3734   }
3735 
3736   /* receives and sends of j-structure are complete */
3737   /*------------------------------------------------*/
3738   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3739   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3740 
3741   /* send and recv i-structure */
3742   /*---------------------------*/
3743   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3744   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3745 
3746   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3747   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3748   for (proc=0,k=0; proc<size; proc++){
3749     if (!len_s[proc]) continue;
3750     /* form outgoing message for i-structure:
3751          buf_si[0]:                 nrows to be sent
3752                [1:nrows]:           row index (global)
3753                [nrows+1:2*nrows+1]: i-structure index
3754     */
3755     /*-------------------------------------------*/
3756     nrows = len_si[proc]/2 - 1;
3757     buf_si_i    = buf_si + nrows+1;
3758     buf_si[0]   = nrows;
3759     buf_si_i[0] = 0;
3760     nrows = 0;
3761     for (i=owners[proc]; i<owners[proc+1]; i++){
3762       anzi = ai[i+1] - ai[i];
3763       if (anzi) {
3764         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3765         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3766         nrows++;
3767       }
3768     }
3769     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3770     k++;
3771     buf_si += len_si[proc];
3772   }
3773 
3774   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3775   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3776 
3777   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3778   for (i=0; i<merge->nrecv; i++){
3779     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3780   }
3781 
3782   ierr = PetscFree(len_si);CHKERRQ(ierr);
3783   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3784   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3785   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3786   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3787   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3788   ierr = PetscFree(status);CHKERRQ(ierr);
3789 
3790   /* compute a local seq matrix in each processor */
3791   /*----------------------------------------------*/
3792   /* allocate bi array and free space for accumulating nonzero column info */
3793   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3794   bi[0] = 0;
3795 
3796   /* create and initialize a linked list */
3797   nlnk = N+1;
3798   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3799 
3800   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3801   len = 0;
3802   len  = ai[owners[rank+1]] - ai[owners[rank]];
3803   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3804   current_space = free_space;
3805 
3806   /* determine symbolic info for each local row */
3807   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3808   nextrow = buf_ri_k + merge->nrecv;
3809   nextai  = nextrow + merge->nrecv;
3810   for (k=0; k<merge->nrecv; k++){
3811     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3812     nrows = *buf_ri_k[k];
3813     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3814     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3815   }
3816 
3817   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3818   len = 0;
3819   for (i=0;i<m;i++) {
3820     bnzi   = 0;
3821     /* add local non-zero cols of this proc's seqmat into lnk */
3822     arow   = owners[rank] + i;
3823     anzi   = ai[arow+1] - ai[arow];
3824     aj     = a->j + ai[arow];
3825     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3826     bnzi += nlnk;
3827     /* add received col data into lnk */
3828     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3829       if (i == *nextrow[k]) { /* i-th row */
3830         anzi = *(nextai[k]+1) - *nextai[k];
3831         aj   = buf_rj[k] + *nextai[k];
3832         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3833         bnzi += nlnk;
3834         nextrow[k]++; nextai[k]++;
3835       }
3836     }
3837     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3838 
3839     /* if free space is not available, make more free space */
3840     if (current_space->local_remaining<bnzi) {
3841       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3842       nspacedouble++;
3843     }
3844     /* copy data into free space, then initialize lnk */
3845     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3846     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3847 
3848     current_space->array           += bnzi;
3849     current_space->local_used      += bnzi;
3850     current_space->local_remaining -= bnzi;
3851 
3852     bi[i+1] = bi[i] + bnzi;
3853   }
3854 
3855   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3856 
3857   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3858   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3859   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3860 
3861   /* create symbolic parallel matrix B_mpi */
3862   /*---------------------------------------*/
3863   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3864   if (n==PETSC_DECIDE) {
3865     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3866   } else {
3867     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3868   }
3869   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3870   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3871   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3872 
3873   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3874   B_mpi->assembled     = PETSC_FALSE;
3875   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3876   merge->bi            = bi;
3877   merge->bj            = bj;
3878   merge->buf_ri        = buf_ri;
3879   merge->buf_rj        = buf_rj;
3880   merge->coi           = PETSC_NULL;
3881   merge->coj           = PETSC_NULL;
3882   merge->owners_co     = PETSC_NULL;
3883 
3884   /* attach the supporting struct to B_mpi for reuse */
3885   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3886   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3887   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3888   *mpimat = B_mpi;
3889 
3890   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3891   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3892   PetscFunctionReturn(0);
3893 }
3894 
3895 static PetscEvent logkey_seqstompi = 0;
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatMerge_SeqsToMPI"
3898 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3899 {
3900   PetscErrorCode   ierr;
3901 
3902   PetscFunctionBegin;
3903   if (!logkey_seqstompi) {
3904     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3905   }
3906   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3907   if (scall == MAT_INITIAL_MATRIX){
3908     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3909   }
3910   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3911   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3912   PetscFunctionReturn(0);
3913 }
3914 static PetscEvent logkey_getlocalmat = 0;
3915 #undef __FUNCT__
3916 #define __FUNCT__ "MatGetLocalMat"
3917 /*@C
3918      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3919 
3920     Not Collective
3921 
3922    Input Parameters:
3923 +    A - the matrix
3924 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3925 
3926    Output Parameter:
3927 .    A_loc - the local sequential matrix generated
3928 
3929     Level: developer
3930 
3931 @*/
3932 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3933 {
3934   PetscErrorCode  ierr;
3935   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3936   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3937   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3938   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3939   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3940   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3941 
3942   PetscFunctionBegin;
3943   if (!logkey_getlocalmat) {
3944     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3945   }
3946   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3947   if (scall == MAT_INITIAL_MATRIX){
3948     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3949     ci[0] = 0;
3950     for (i=0; i<am; i++){
3951       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3952     }
3953     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3954     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3955     k = 0;
3956     for (i=0; i<am; i++) {
3957       ncols_o = bi[i+1] - bi[i];
3958       ncols_d = ai[i+1] - ai[i];
3959       /* off-diagonal portion of A */
3960       for (jo=0; jo<ncols_o; jo++) {
3961         col = cmap[*bj];
3962         if (col >= cstart) break;
3963         cj[k]   = col; bj++;
3964         ca[k++] = *ba++;
3965       }
3966       /* diagonal portion of A */
3967       for (j=0; j<ncols_d; j++) {
3968         cj[k]   = cstart + *aj++;
3969         ca[k++] = *aa++;
3970       }
3971       /* off-diagonal portion of A */
3972       for (j=jo; j<ncols_o; j++) {
3973         cj[k]   = cmap[*bj++];
3974         ca[k++] = *ba++;
3975       }
3976     }
3977     /* put together the new matrix */
3978     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3979     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3980     /* Since these are PETSc arrays, change flags to free them as necessary. */
3981     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3982     mat->free_a  = PETSC_TRUE;
3983     mat->free_ij = PETSC_TRUE;
3984     mat->nonew   = 0;
3985   } else if (scall == MAT_REUSE_MATRIX){
3986     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3987     ci = mat->i; cj = mat->j; ca = mat->a;
3988     for (i=0; i<am; i++) {
3989       /* off-diagonal portion of A */
3990       ncols_o = bi[i+1] - bi[i];
3991       for (jo=0; jo<ncols_o; jo++) {
3992         col = cmap[*bj];
3993         if (col >= cstart) break;
3994         *ca++ = *ba++; bj++;
3995       }
3996       /* diagonal portion of A */
3997       ncols_d = ai[i+1] - ai[i];
3998       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3999       /* off-diagonal portion of A */
4000       for (j=jo; j<ncols_o; j++) {
4001         *ca++ = *ba++; bj++;
4002       }
4003     }
4004   } else {
4005     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4006   }
4007 
4008   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4009   PetscFunctionReturn(0);
4010 }
4011 
4012 static PetscEvent logkey_getlocalmatcondensed = 0;
4013 #undef __FUNCT__
4014 #define __FUNCT__ "MatGetLocalMatCondensed"
4015 /*@C
4016      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4017 
4018     Not Collective
4019 
4020    Input Parameters:
4021 +    A - the matrix
4022 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4023 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4024 
4025    Output Parameter:
4026 .    A_loc - the local sequential matrix generated
4027 
4028     Level: developer
4029 
4030 @*/
4031 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4032 {
4033   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4034   PetscErrorCode    ierr;
4035   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4036   IS                isrowa,iscola;
4037   Mat               *aloc;
4038 
4039   PetscFunctionBegin;
4040   if (!logkey_getlocalmatcondensed) {
4041     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4042   }
4043   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4044   if (!row){
4045     start = A->rmap.rstart; end = A->rmap.rend;
4046     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4047   } else {
4048     isrowa = *row;
4049   }
4050   if (!col){
4051     start = A->cmap.rstart;
4052     cmap  = a->garray;
4053     nzA   = a->A->cmap.n;
4054     nzB   = a->B->cmap.n;
4055     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4056     ncols = 0;
4057     for (i=0; i<nzB; i++) {
4058       if (cmap[i] < start) idx[ncols++] = cmap[i];
4059       else break;
4060     }
4061     imark = i;
4062     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4063     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4064     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4065     ierr = PetscFree(idx);CHKERRQ(ierr);
4066   } else {
4067     iscola = *col;
4068   }
4069   if (scall != MAT_INITIAL_MATRIX){
4070     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4071     aloc[0] = *A_loc;
4072   }
4073   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4074   *A_loc = aloc[0];
4075   ierr = PetscFree(aloc);CHKERRQ(ierr);
4076   if (!row){
4077     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4078   }
4079   if (!col){
4080     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4081   }
4082   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4083   PetscFunctionReturn(0);
4084 }
4085 
4086 static PetscEvent logkey_GetBrowsOfAcols = 0;
4087 #undef __FUNCT__
4088 #define __FUNCT__ "MatGetBrowsOfAcols"
4089 /*@C
4090     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4091 
4092     Collective on Mat
4093 
4094    Input Parameters:
4095 +    A,B - the matrices in mpiaij format
4096 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4097 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4098 
4099    Output Parameter:
4100 +    rowb, colb - index sets of rows and columns of B to extract
4101 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4102 -    B_seq - the sequential matrix generated
4103 
4104     Level: developer
4105 
4106 @*/
4107 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4108 {
4109   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4110   PetscErrorCode    ierr;
4111   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4112   IS                isrowb,iscolb;
4113   Mat               *bseq;
4114 
4115   PetscFunctionBegin;
4116   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4117     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4118   }
4119   if (!logkey_GetBrowsOfAcols) {
4120     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4121   }
4122   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4123 
4124   if (scall == MAT_INITIAL_MATRIX){
4125     start = A->cmap.rstart;
4126     cmap  = a->garray;
4127     nzA   = a->A->cmap.n;
4128     nzB   = a->B->cmap.n;
4129     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4130     ncols = 0;
4131     for (i=0; i<nzB; i++) {  /* row < local row index */
4132       if (cmap[i] < start) idx[ncols++] = cmap[i];
4133       else break;
4134     }
4135     imark = i;
4136     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4137     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4138     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4139     ierr = PetscFree(idx);CHKERRQ(ierr);
4140     *brstart = imark;
4141     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4142   } else {
4143     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4144     isrowb = *rowb; iscolb = *colb;
4145     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4146     bseq[0] = *B_seq;
4147   }
4148   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4149   *B_seq = bseq[0];
4150   ierr = PetscFree(bseq);CHKERRQ(ierr);
4151   if (!rowb){
4152     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4153   } else {
4154     *rowb = isrowb;
4155   }
4156   if (!colb){
4157     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4158   } else {
4159     *colb = iscolb;
4160   }
4161   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4162   PetscFunctionReturn(0);
4163 }
4164 
4165 static PetscEvent logkey_GetBrowsOfAocols = 0;
4166 #undef __FUNCT__
4167 #define __FUNCT__ "MatGetBrowsOfAoCols"
4168 /*@C
4169     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4170     of the OFF-DIAGONAL portion of local A
4171 
4172     Collective on Mat
4173 
4174    Input Parameters:
4175 +    A,B - the matrices in mpiaij format
4176 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4177 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4178 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4179 
4180    Output Parameter:
4181 +    B_oth - the sequential matrix generated
4182 
4183     Level: developer
4184 
4185 @*/
4186 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4187 {
4188   VecScatter_MPI_General *gen_to,*gen_from;
4189   PetscErrorCode         ierr;
4190   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4191   Mat_SeqAIJ             *b_oth;
4192   VecScatter             ctx=a->Mvctx;
4193   MPI_Comm               comm=ctx->comm;
4194   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4195   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4196   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4197   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4198   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4199   MPI_Status             *sstatus,rstatus;
4200   PetscInt               *cols,sbs,rbs;
4201   PetscScalar            *vals;
4202 
4203   PetscFunctionBegin;
4204   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4205     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4206   }
4207   if (!logkey_GetBrowsOfAocols) {
4208     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4209   }
4210   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4211   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4212 
4213   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4214   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4215   rvalues  = gen_from->values; /* holds the length of receiving row */
4216   svalues  = gen_to->values;   /* holds the length of sending row */
4217   nrecvs   = gen_from->n;
4218   nsends   = gen_to->n;
4219 
4220   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4221   srow     = gen_to->indices;   /* local row index to be sent */
4222   sstarts  = gen_to->starts;
4223   sprocs   = gen_to->procs;
4224   sstatus  = gen_to->sstatus;
4225   sbs      = gen_to->bs;
4226   rstarts  = gen_from->starts;
4227   rprocs   = gen_from->procs;
4228   rbs      = gen_from->bs;
4229 
4230   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4231   if (scall == MAT_INITIAL_MATRIX){
4232     /* i-array */
4233     /*---------*/
4234     /*  post receives */
4235     for (i=0; i<nrecvs; i++){
4236       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4237       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4238       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4239     }
4240 
4241     /* pack the outgoing message */
4242     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4243     rstartsj = sstartsj + nsends +1;
4244     sstartsj[0] = 0;  rstartsj[0] = 0;
4245     len = 0; /* total length of j or a array to be sent */
4246     k = 0;
4247     for (i=0; i<nsends; i++){
4248       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4249       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4250       for (j=0; j<nrows; j++) {
4251         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4252         for (l=0; l<sbs; l++){
4253           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4254           rowlen[j*sbs+l] = ncols;
4255           len += ncols;
4256           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4257         }
4258         k++;
4259       }
4260       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4261       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4262     }
4263     /* recvs and sends of i-array are completed */
4264     i = nrecvs;
4265     while (i--) {
4266       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4267     }
4268     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4269 
4270     /* allocate buffers for sending j and a arrays */
4271     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4272     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4273 
4274     /* create i-array of B_oth */
4275     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4276     b_othi[0] = 0;
4277     len = 0; /* total length of j or a array to be received */
4278     k = 0;
4279     for (i=0; i<nrecvs; i++){
4280       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4281       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4282       for (j=0; j<nrows; j++) {
4283         b_othi[k+1] = b_othi[k] + rowlen[j];
4284         len += rowlen[j]; k++;
4285       }
4286       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4287     }
4288 
4289     /* allocate space for j and a arrrays of B_oth */
4290     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4291     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4292 
4293     /* j-array */
4294     /*---------*/
4295     /*  post receives of j-array */
4296     for (i=0; i<nrecvs; i++){
4297       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4298       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4299     }
4300 
4301     /* pack the outgoing message j-array */
4302     k = 0;
4303     for (i=0; i<nsends; i++){
4304       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4305       bufJ = bufj+sstartsj[i];
4306       for (j=0; j<nrows; j++) {
4307         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4308         for (ll=0; ll<sbs; ll++){
4309           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4310           for (l=0; l<ncols; l++){
4311             *bufJ++ = cols[l];
4312           }
4313           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4314         }
4315       }
4316       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4317     }
4318 
4319     /* recvs and sends of j-array are completed */
4320     i = nrecvs;
4321     while (i--) {
4322       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4323     }
4324     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4325   } else if (scall == MAT_REUSE_MATRIX){
4326     sstartsj = *startsj;
4327     rstartsj = sstartsj + nsends +1;
4328     bufa     = *bufa_ptr;
4329     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4330     b_otha   = b_oth->a;
4331   } else {
4332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4333   }
4334 
4335   /* a-array */
4336   /*---------*/
4337   /*  post receives of a-array */
4338   for (i=0; i<nrecvs; i++){
4339     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4340     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4341   }
4342 
4343   /* pack the outgoing message a-array */
4344   k = 0;
4345   for (i=0; i<nsends; i++){
4346     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4347     bufA = bufa+sstartsj[i];
4348     for (j=0; j<nrows; j++) {
4349       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4350       for (ll=0; ll<sbs; ll++){
4351         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4352         for (l=0; l<ncols; l++){
4353           *bufA++ = vals[l];
4354         }
4355         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4356       }
4357     }
4358     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4359   }
4360   /* recvs and sends of a-array are completed */
4361   i = nrecvs;
4362   while (i--) {
4363     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4364   }
4365   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4366   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4367 
4368   if (scall == MAT_INITIAL_MATRIX){
4369     /* put together the new matrix */
4370     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4371 
4372     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4373     /* Since these are PETSc arrays, change flags to free them as necessary. */
4374     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4375     b_oth->free_a  = PETSC_TRUE;
4376     b_oth->free_ij = PETSC_TRUE;
4377     b_oth->nonew   = 0;
4378 
4379     ierr = PetscFree(bufj);CHKERRQ(ierr);
4380     if (!startsj || !bufa_ptr){
4381       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4382       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4383     } else {
4384       *startsj  = sstartsj;
4385       *bufa_ptr = bufa;
4386     }
4387   }
4388   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 #undef __FUNCT__
4393 #define __FUNCT__ "MatGetCommunicationStructs"
4394 /*@C
4395   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4396 
4397   Not Collective
4398 
4399   Input Parameters:
4400 . A - The matrix in mpiaij format
4401 
4402   Output Parameter:
4403 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4404 . colmap - A map from global column index to local index into lvec
4405 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4406 
4407   Level: developer
4408 
4409 @*/
4410 #if defined (PETSC_USE_CTABLE)
4411 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4412 #else
4413 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4414 #endif
4415 {
4416   Mat_MPIAIJ *a;
4417 
4418   PetscFunctionBegin;
4419   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4420   PetscValidPointer(lvec, 2)
4421   PetscValidPointer(colmap, 3)
4422   PetscValidPointer(multScatter, 4)
4423   a = (Mat_MPIAIJ *) A->data;
4424   if (lvec) *lvec = a->lvec;
4425   if (colmap) *colmap = a->colmap;
4426   if (multScatter) *multScatter = a->Mvctx;
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 EXTERN_C_BEGIN
4431 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4432 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4433 EXTERN_C_END
4434 
4435 /*MC
4436    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4437 
4438    Options Database Keys:
4439 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4440 
4441   Level: beginner
4442 
4443 .seealso: MatCreateMPIAIJ
4444 M*/
4445 
4446 EXTERN_C_BEGIN
4447 #undef __FUNCT__
4448 #define __FUNCT__ "MatCreate_MPIAIJ"
4449 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4450 {
4451   Mat_MPIAIJ     *b;
4452   PetscErrorCode ierr;
4453   PetscMPIInt    size;
4454 
4455   PetscFunctionBegin;
4456   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4457 
4458   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4459   B->data         = (void*)b;
4460   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4461   B->factor       = 0;
4462   B->rmap.bs      = 1;
4463   B->assembled    = PETSC_FALSE;
4464   B->mapping      = 0;
4465 
4466   B->insertmode      = NOT_SET_VALUES;
4467   b->size            = size;
4468   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4469 
4470   /* build cache for off array entries formed */
4471   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4472   b->donotstash  = PETSC_FALSE;
4473   b->colmap      = 0;
4474   b->garray      = 0;
4475   b->roworiented = PETSC_TRUE;
4476 
4477   /* stuff used for matrix vector multiply */
4478   b->lvec      = PETSC_NULL;
4479   b->Mvctx     = PETSC_NULL;
4480 
4481   /* stuff for MatGetRow() */
4482   b->rowindices   = 0;
4483   b->rowvalues    = 0;
4484   b->getrowactive = PETSC_FALSE;
4485 
4486 
4487   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4488                                      "MatStoreValues_MPIAIJ",
4489                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4491                                      "MatRetrieveValues_MPIAIJ",
4492                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4494 				     "MatGetDiagonalBlock_MPIAIJ",
4495                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4497 				     "MatIsTranspose_MPIAIJ",
4498 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4500 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4501 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4503 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4504 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4506 				     "MatDiagonalScaleLocal_MPIAIJ",
4507 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4509                                      "MatConvert_MPIAIJ_MPICSRPERM",
4510                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4512                                      "MatConvert_MPIAIJ_MPICRL",
4513                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4514   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4515   PetscFunctionReturn(0);
4516 }
4517 EXTERN_C_END
4518 
4519 #undef __FUNCT__
4520 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4521 /*@C
4522      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4523          and "off-diagonal" part of the matrix in CSR format.
4524 
4525    Collective on MPI_Comm
4526 
4527    Input Parameters:
4528 +  comm - MPI communicator
4529 .  m - number of local rows (Cannot be PETSC_DECIDE)
4530 .  n - This value should be the same as the local size used in creating the
4531        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4532        calculated if N is given) For square matrices n is almost always m.
4533 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4534 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4535 .   i - row indices for "diagonal" portion of matrix
4536 .   j - column indices
4537 .   a - matrix values
4538 .   oi - row indices for "off-diagonal" portion of matrix
4539 .   oj - column indices
4540 -   oa - matrix values
4541 
4542    Output Parameter:
4543 .   mat - the matrix
4544 
4545    Level: advanced
4546 
4547    Notes:
4548        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4549 
4550        The i and j indices are 0 based
4551 
4552        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4553 
4554 
4555 .keywords: matrix, aij, compressed row, sparse, parallel
4556 
4557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4558           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4559 @*/
4560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4561 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4562 {
4563   PetscErrorCode ierr;
4564   Mat_MPIAIJ     *maij;
4565 
4566  PetscFunctionBegin;
4567   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4568   if (i[0]) {
4569     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4570   }
4571   if (oi[0]) {
4572     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4573   }
4574   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4575   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4576   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4577   maij = (Mat_MPIAIJ*) (*mat)->data;
4578   maij->donotstash     = PETSC_TRUE;
4579   (*mat)->preallocated = PETSC_TRUE;
4580 
4581   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4582   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr);
4583   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr);
4584 
4585   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4586   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4587 
4588   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4589   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4590   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4591   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4592 
4593   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4594   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4595   PetscFunctionReturn(0);
4596 }
4597 
4598 /*
4599     Special version for direct calls from Fortran
4600 */
4601 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4602 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4603 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4604 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4605 #endif
4606 
4607 /* Change these macros so can be used in void function */
4608 #undef CHKERRQ
4609 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4610 #undef SETERRQ2
4611 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4612 #undef SETERRQ
4613 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4614 
4615 EXTERN_C_BEGIN
4616 #undef __FUNCT__
4617 #define __FUNCT__ "matsetvaluesmpiaij_"
4618 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4619 {
4620   Mat            mat = *mmat;
4621   PetscInt       m = *mm, n = *mn;
4622   InsertMode     addv = *maddv;
4623   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4624   PetscScalar    value;
4625   PetscErrorCode ierr;
4626 
4627   MatPreallocated(mat);
4628   if (mat->insertmode == NOT_SET_VALUES) {
4629     mat->insertmode = addv;
4630   }
4631 #if defined(PETSC_USE_DEBUG)
4632   else if (mat->insertmode != addv) {
4633     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4634   }
4635 #endif
4636   {
4637   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4638   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4639   PetscTruth     roworiented = aij->roworiented;
4640 
4641   /* Some Variables required in the macro */
4642   Mat            A = aij->A;
4643   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4644   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4645   PetscScalar    *aa = a->a;
4646   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4647   Mat            B = aij->B;
4648   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4649   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4650   PetscScalar    *ba = b->a;
4651 
4652   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4653   PetscInt       nonew = a->nonew;
4654   PetscScalar    *ap1,*ap2;
4655 
4656   PetscFunctionBegin;
4657   for (i=0; i<m; i++) {
4658     if (im[i] < 0) continue;
4659 #if defined(PETSC_USE_DEBUG)
4660     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4661 #endif
4662     if (im[i] >= rstart && im[i] < rend) {
4663       row      = im[i] - rstart;
4664       lastcol1 = -1;
4665       rp1      = aj + ai[row];
4666       ap1      = aa + ai[row];
4667       rmax1    = aimax[row];
4668       nrow1    = ailen[row];
4669       low1     = 0;
4670       high1    = nrow1;
4671       lastcol2 = -1;
4672       rp2      = bj + bi[row];
4673       ap2      = ba + bi[row];
4674       rmax2    = bimax[row];
4675       nrow2    = bilen[row];
4676       low2     = 0;
4677       high2    = nrow2;
4678 
4679       for (j=0; j<n; j++) {
4680         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4681         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4682         if (in[j] >= cstart && in[j] < cend){
4683           col = in[j] - cstart;
4684           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4685         } else if (in[j] < 0) continue;
4686 #if defined(PETSC_USE_DEBUG)
4687         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4688 #endif
4689         else {
4690           if (mat->was_assembled) {
4691             if (!aij->colmap) {
4692               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4693             }
4694 #if defined (PETSC_USE_CTABLE)
4695             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4696 	    col--;
4697 #else
4698             col = aij->colmap[in[j]] - 1;
4699 #endif
4700             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4701               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4702               col =  in[j];
4703               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4704               B = aij->B;
4705               b = (Mat_SeqAIJ*)B->data;
4706               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4707               rp2      = bj + bi[row];
4708               ap2      = ba + bi[row];
4709               rmax2    = bimax[row];
4710               nrow2    = bilen[row];
4711               low2     = 0;
4712               high2    = nrow2;
4713               bm       = aij->B->rmap.n;
4714               ba = b->a;
4715             }
4716           } else col = in[j];
4717           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4718         }
4719       }
4720     } else {
4721       if (!aij->donotstash) {
4722         if (roworiented) {
4723           if (ignorezeroentries && v[i*n] == 0.0) continue;
4724           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4725         } else {
4726           if (ignorezeroentries && v[i] == 0.0) continue;
4727           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4728         }
4729       }
4730     }
4731   }}
4732   PetscFunctionReturnVoid();
4733 }
4734 EXTERN_C_END
4735 
4736