xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 18d3aa3f5b7b6cf3b58215079fab4e2a5c7adae3)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
8 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
9 {
10   PetscErrorCode  ierr;
11   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
12   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
13   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
14   const PetscInt  *ia,*ib;
15   const MatScalar *aa,*bb;
16   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
17   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
18 
19   PetscFunctionBegin;
20   *keptrows = 0;
21   ia = a->i;
22   ib = b->i;
23   for (i=0; i<m; i++) {
24     na = ia[i+1] - ia[i];
25     nb = ib[i+1] - ib[i];
26     if (!na && !nb) {
27       cnt++;
28       goto ok1;
29     }
30     aa = a->a + ia[i];
31     for (j=0; j<na; j++) {
32       if (aa[j] != 0.0) goto ok1;
33     }
34     bb = b->a + ib[i];
35     for (j=0; j <nb; j++) {
36       if (bb[j] != 0.0) goto ok1;
37     }
38     cnt++;
39     ok1:;
40   }
41   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
42   if (!n0rows) PetscFunctionReturn(0);
43   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
44   cnt = 0;
45   for (i=0; i<m; i++) {
46     na = ia[i+1] - ia[i];
47     nb = ib[i+1] - ib[i];
48     if (!na && !nb) continue;
49     aa = a->a + ia[i];
50     for(j=0; j<na;j++) {
51       if (aa[j] != 0.0) {
52         rows[cnt++] = rstart + i;
53         goto ok2;
54       }
55     }
56     bb = b->a + ib[i];
57     for (j=0; j<nb; j++) {
58       if (bb[j] != 0.0) {
59         rows[cnt++] = rstart + i;
60         goto ok2;
61       }
62     }
63     ok2:;
64   }
65   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 #undef __FUNCT__
70 #define __FUNCT__ "MatDistribute_MPIAIJ"
71 /*
72     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
73     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
74 
75     Only for square matrices
76 */
77 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
78 {
79   PetscMPIInt    rank,size;
80   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
81   PetscErrorCode ierr;
82   Mat            mat;
83   Mat_SeqAIJ     *gmata;
84   PetscMPIInt    tag;
85   MPI_Status     status;
86   PetscBool      aij;
87   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
88 
89   PetscFunctionBegin;
90   CHKMEMQ;
91   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
92   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
93   if (!rank) {
94     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
95     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
96   }
97   if (reuse == MAT_INITIAL_MATRIX) {
98     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
99     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
100     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
101     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
102     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
103     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
104     rowners[0] = 0;
105     for (i=2; i<=size; i++) {
106       rowners[i] += rowners[i-1];
107     }
108     rstart = rowners[rank];
109     rend   = rowners[rank+1];
110     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
111     if (!rank) {
112       gmata = (Mat_SeqAIJ*) gmat->data;
113       /* send row lengths to all processors */
114       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
115       for (i=1; i<size; i++) {
116 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
117       }
118       /* determine number diagonal and off-diagonal counts */
119       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
120       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
121       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
122       jj = 0;
123       for (i=0; i<m; i++) {
124 	for (j=0; j<dlens[i]; j++) {
125           if (gmata->j[jj] < rstart) ld[i]++;
126 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
127 	  jj++;
128 	}
129       }
130       /* send column indices to other processes */
131       for (i=1; i<size; i++) {
132 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
133 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
135       }
136 
137       /* send numerical values to other processes */
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       gmataa = gmata->a;
143       gmataj = gmata->j;
144 
145     } else {
146       /* receive row lengths */
147       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
148       /* receive column indices */
149       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
150       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
151       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
152       /* determine number diagonal and off-diagonal counts */
153       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
154       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
155       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
156       jj = 0;
157       for (i=0; i<m; i++) {
158 	for (j=0; j<dlens[i]; j++) {
159           if (gmataj[jj] < rstart) ld[i]++;
160 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
161 	  jj++;
162 	}
163       }
164       /* receive numerical values */
165       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
166       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
167     }
168     /* set preallocation */
169     for (i=0; i<m; i++) {
170       dlens[i] -= olens[i];
171     }
172     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
173     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
174 
175     for (i=0; i<m; i++) {
176       dlens[i] += olens[i];
177     }
178     cnt  = 0;
179     for (i=0; i<m; i++) {
180       row  = rstart + i;
181       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
182       cnt += dlens[i];
183     }
184     if (rank) {
185       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
186     }
187     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
188     ierr = PetscFree(rowners);CHKERRQ(ierr);
189     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
190     *inmat = mat;
191   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
192     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
193     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
194     mat   = *inmat;
195     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
196     if (!rank) {
197       /* send numerical values to other processes */
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
200       gmataa = gmata->a;
201       for (i=1; i<size; i++) {
202         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
203         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
204       }
205       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
206     } else {
207       /* receive numerical values from process 0*/
208       nz   = Ad->nz + Ao->nz;
209       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
210       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
211     }
212     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
213     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
214     ad = Ad->a;
215     ao = Ao->a;
216     if (mat->rmap->n) {
217       i  = 0;
218       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
219       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
220     }
221     for (i=1; i<mat->rmap->n; i++) {
222       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
223       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
224     }
225     i--;
226     if (mat->rmap->n) {
227       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
228     }
229     if (rank) {
230       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
231     }
232   }
233   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
235   CHKMEMQ;
236   PetscFunctionReturn(0);
237 }
238 
239 /*
240   Local utility routine that creates a mapping from the global column
241 number to the local number in the off-diagonal part of the local
242 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
243 a slightly higher hash table cost; without it it is not scalable (each processor
244 has an order N integer array but is fast to acess.
245 */
246 #undef __FUNCT__
247 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
248 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
249 {
250   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
251   PetscErrorCode ierr;
252   PetscInt       n = aij->B->cmap->n,i;
253 
254   PetscFunctionBegin;
255 #if defined (PETSC_USE_CTABLE)
256   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
257   for (i=0; i<n; i++){
258     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
259   }
260 #else
261   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
262   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
264   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
265 #endif
266   PetscFunctionReturn(0);
267 }
268 
269 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
270 { \
271     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
272     lastcol1 = col;\
273     while (high1-low1 > 5) { \
274       t = (low1+high1)/2; \
275       if (rp1[t] > col) high1 = t; \
276       else             low1  = t; \
277     } \
278       for (_i=low1; _i<high1; _i++) { \
279         if (rp1[_i] > col) break; \
280         if (rp1[_i] == col) { \
281           if (addv == ADD_VALUES) ap1[_i] += value;   \
282           else                    ap1[_i] = value; \
283           goto a_noinsert; \
284         } \
285       }  \
286       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
287       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
288       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
289       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
290       N = nrow1++ - 1; a->nz++; high1++; \
291       /* shift up all the later entries in this row */ \
292       for (ii=N; ii>=_i; ii--) { \
293         rp1[ii+1] = rp1[ii]; \
294         ap1[ii+1] = ap1[ii]; \
295       } \
296       rp1[_i] = col;  \
297       ap1[_i] = value;  \
298       a_noinsert: ; \
299       ailen[row] = nrow1; \
300 }
301 
302 
303 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
304 { \
305     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
306     lastcol2 = col;\
307     while (high2-low2 > 5) { \
308       t = (low2+high2)/2; \
309       if (rp2[t] > col) high2 = t; \
310       else             low2  = t; \
311     } \
312     for (_i=low2; _i<high2; _i++) {		\
313       if (rp2[_i] > col) break;			\
314       if (rp2[_i] == col) {			      \
315 	if (addv == ADD_VALUES) ap2[_i] += value;     \
316 	else                    ap2[_i] = value;      \
317 	goto b_noinsert;			      \
318       }						      \
319     }							      \
320     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
321     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
322     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
323     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
324     N = nrow2++ - 1; b->nz++; high2++;					\
325     /* shift up all the later entries in this row */			\
326     for (ii=N; ii>=_i; ii--) {						\
327       rp2[ii+1] = rp2[ii];						\
328       ap2[ii+1] = ap2[ii];						\
329     }									\
330     rp2[_i] = col;							\
331     ap2[_i] = value;							\
332     b_noinsert: ;								\
333     bilen[row] = nrow2;							\
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
338 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
339 {
340   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
341   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
342   PetscErrorCode ierr;
343   PetscInt       l,*garray = mat->garray,diag;
344 
345   PetscFunctionBegin;
346   /* code only works for square matrices A */
347 
348   /* find size of row to the left of the diagonal part */
349   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
350   row  = row - diag;
351   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
352     if (garray[b->j[b->i[row]+l]] > diag) break;
353   }
354   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
355 
356   /* diagonal part */
357   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
358 
359   /* right of diagonal part */
360   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNCT__
365 #define __FUNCT__ "MatSetValues_MPIAIJ"
366 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
367 {
368   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
369   PetscScalar    value;
370   PetscErrorCode ierr;
371   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
372   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
373   PetscBool      roworiented = aij->roworiented;
374 
375   /* Some Variables required in the macro */
376   Mat            A = aij->A;
377   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
378   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
379   MatScalar      *aa = a->a;
380   PetscBool      ignorezeroentries = a->ignorezeroentries;
381   Mat            B = aij->B;
382   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
383   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
384   MatScalar      *ba = b->a;
385 
386   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
387   PetscInt       nonew = a->nonew;
388   MatScalar      *ap1,*ap2;
389 
390   PetscFunctionBegin;
391   if (v) PetscValidScalarPointer(v,6);
392   for (i=0; i<m; i++) {
393     if (im[i] < 0) continue;
394 #if defined(PETSC_USE_DEBUG)
395     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
396 #endif
397     if (im[i] >= rstart && im[i] < rend) {
398       row      = im[i] - rstart;
399       lastcol1 = -1;
400       rp1      = aj + ai[row];
401       ap1      = aa + ai[row];
402       rmax1    = aimax[row];
403       nrow1    = ailen[row];
404       low1     = 0;
405       high1    = nrow1;
406       lastcol2 = -1;
407       rp2      = bj + bi[row];
408       ap2      = ba + bi[row];
409       rmax2    = bimax[row];
410       nrow2    = bilen[row];
411       low2     = 0;
412       high2    = nrow2;
413 
414       for (j=0; j<n; j++) {
415         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
416         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
417         if (in[j] >= cstart && in[j] < cend){
418           col = in[j] - cstart;
419           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
420         } else if (in[j] < 0) continue;
421 #if defined(PETSC_USE_DEBUG)
422         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
423 #endif
424         else {
425           if (mat->was_assembled) {
426             if (!aij->colmap) {
427               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
428             }
429 #if defined (PETSC_USE_CTABLE)
430             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
431 	    col--;
432 #else
433             col = aij->colmap[in[j]] - 1;
434 #endif
435             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
436               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
437               col =  in[j];
438               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
439               B = aij->B;
440               b = (Mat_SeqAIJ*)B->data;
441               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
442               rp2      = bj + bi[row];
443               ap2      = ba + bi[row];
444               rmax2    = bimax[row];
445               nrow2    = bilen[row];
446               low2     = 0;
447               high2    = nrow2;
448               bm       = aij->B->rmap->n;
449               ba = b->a;
450             }
451           } else col = in[j];
452           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
453         }
454       }
455     } else {
456       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
457       if (!aij->donotstash) {
458         if (roworiented) {
459           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
460         } else {
461           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
462         }
463       }
464     }
465   }
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatGetValues_MPIAIJ"
471 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
472 {
473   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
474   PetscErrorCode ierr;
475   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
476   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
477 
478   PetscFunctionBegin;
479   for (i=0; i<m; i++) {
480     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
481     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
482     if (idxm[i] >= rstart && idxm[i] < rend) {
483       row = idxm[i] - rstart;
484       for (j=0; j<n; j++) {
485         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
486         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
487         if (idxn[j] >= cstart && idxn[j] < cend){
488           col = idxn[j] - cstart;
489           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
490         } else {
491           if (!aij->colmap) {
492             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
493           }
494 #if defined (PETSC_USE_CTABLE)
495           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
496           col --;
497 #else
498           col = aij->colmap[idxn[j]] - 1;
499 #endif
500           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
501           else {
502             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
503           }
504         }
505       }
506     } else {
507       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
508     }
509   }
510   PetscFunctionReturn(0);
511 }
512 
513 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
514 
515 #undef __FUNCT__
516 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
517 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
518 {
519   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
520   PetscErrorCode ierr;
521   PetscInt       nstash,reallocs;
522   InsertMode     addv;
523 
524   PetscFunctionBegin;
525   if (aij->donotstash || mat->nooffprocentries) {
526     PetscFunctionReturn(0);
527   }
528 
529   /* make sure all processors are either in INSERTMODE or ADDMODE */
530   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
531   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
532   mat->insertmode = addv; /* in case this processor had no cache */
533 
534   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
535   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
536   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
542 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
543 {
544   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
545   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
546   PetscErrorCode ierr;
547   PetscMPIInt    n;
548   PetscInt       i,j,rstart,ncols,flg;
549   PetscInt       *row,*col;
550   PetscBool      other_disassembled;
551   PetscScalar    *val;
552   InsertMode     addv = mat->insertmode;
553 
554   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
555   PetscFunctionBegin;
556   if (!aij->donotstash && !mat->nooffprocentries) {
557     while (1) {
558       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
559       if (!flg) break;
560 
561       for (i=0; i<n;) {
562         /* Now identify the consecutive vals belonging to the same row */
563         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
564         if (j < n) ncols = j-i;
565         else       ncols = n-i;
566         /* Now assemble all these values with a single function call */
567         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
568         i = j;
569       }
570     }
571     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
572   }
573   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
590   }
591   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
593   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
594   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
595 
596   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
597   aij->rowvalues = 0;
598 
599   /* used by MatAXPY() */
600   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
601   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
602 
603   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
604   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
610 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
611 {
612   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
617   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
618   PetscFunctionReturn(0);
619 }
620 
621 #undef __FUNCT__
622 #define __FUNCT__ "MatZeroRows_MPIAIJ"
623 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
624 {
625   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
626   PetscErrorCode    ierr;
627   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
628   PetscInt          i,*owners = A->rmap->range;
629   PetscInt          *nprocs,j,idx,nsends,row;
630   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
631   PetscInt          *rvalues,count,base,slen,*source;
632   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
633   MPI_Comm          comm = ((PetscObject)A)->comm;
634   MPI_Request       *send_waits,*recv_waits;
635   MPI_Status        recv_status,*send_status;
636   const PetscScalar *xx;
637   PetscScalar       *bb;
638 #if defined(PETSC_DEBUG)
639   PetscBool      found = PETSC_FALSE;
640 #endif
641 
642   PetscFunctionBegin;
643   /*  first count number of contributors to each processor */
644   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
645   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
646   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
647   j = 0;
648   for (i=0; i<N; i++) {
649     if (lastidx > (idx = rows[i])) j = 0;
650     lastidx = idx;
651     for (; j<size; j++) {
652       if (idx >= owners[j] && idx < owners[j+1]) {
653         nprocs[2*j]++;
654         nprocs[2*j+1] = 1;
655         owner[i] = j;
656 #if defined(PETSC_DEBUG)
657         found = PETSC_TRUE;
658 #endif
659         break;
660       }
661     }
662 #if defined(PETSC_DEBUG)
663     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
664     found = PETSC_FALSE;
665 #endif
666   }
667   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
668 
669   if (A->nooffproczerorows) {
670     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
671     nrecvs = nsends;
672     nmax   = N;
673   } else {
674     /* inform other processors of number of messages and max length*/
675     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
676   }
677 
678   /* post receives:   */
679   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
680   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
681   for (i=0; i<nrecvs; i++) {
682     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
683   }
684 
685   /* do sends:
686       1) starts[i] gives the starting index in svalues for stuff going to
687          the ith processor
688   */
689   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
690   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
691   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
692   starts[0] = 0;
693   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
694   for (i=0; i<N; i++) {
695     svalues[starts[owner[i]]++] = rows[i];
696   }
697 
698   starts[0] = 0;
699   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
700   count = 0;
701   for (i=0; i<size; i++) {
702     if (nprocs[2*i+1]) {
703       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
704     }
705   }
706   ierr = PetscFree(starts);CHKERRQ(ierr);
707 
708   base = owners[rank];
709 
710   /*  wait on receives */
711   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
712   count  = nrecvs; slen = 0;
713   while (count) {
714     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
715     /* unpack receives into our local space */
716     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
717     source[imdex]  = recv_status.MPI_SOURCE;
718     lens[imdex]    = n;
719     slen          += n;
720     count--;
721   }
722   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
723 
724   /* move the data into the send scatter */
725   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
726   count = 0;
727   for (i=0; i<nrecvs; i++) {
728     values = rvalues + i*nmax;
729     for (j=0; j<lens[i]; j++) {
730       lrows[count++] = values[j] - base;
731     }
732   }
733   ierr = PetscFree(rvalues);CHKERRQ(ierr);
734   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
735   ierr = PetscFree(owner);CHKERRQ(ierr);
736   ierr = PetscFree(nprocs);CHKERRQ(ierr);
737 
738   /* fix right hand side if needed */
739   if (x && b) {
740     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
741     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
742     for (i=0; i<slen; i++) {
743       bb[lrows[i]] = diag*xx[lrows[i]];
744     }
745     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
746     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
747   }
748   /*
749         Zero the required rows. If the "diagonal block" of the matrix
750      is square and the user wishes to set the diagonal we use separate
751      code so that MatSetValues() is not called for each diagonal allocating
752      new memory, thus calling lots of mallocs and slowing things down.
753 
754   */
755   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
756   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
757   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
758     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
759   } else if (diag != 0.0) {
760     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
761     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
762       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
763 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
764     }
765     for (i = 0; i < slen; i++) {
766       row  = lrows[i] + rstart;
767       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
768     }
769     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
771   } else {
772     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
773   }
774   ierr = PetscFree(lrows);CHKERRQ(ierr);
775 
776   /* wait on sends */
777   if (nsends) {
778     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
779     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
780     ierr = PetscFree(send_status);CHKERRQ(ierr);
781   }
782   ierr = PetscFree(send_waits);CHKERRQ(ierr);
783   ierr = PetscFree(svalues);CHKERRQ(ierr);
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
790 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
791 {
792   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
793   PetscErrorCode    ierr;
794   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
795   PetscInt          i,*owners = A->rmap->range;
796   PetscInt          *nprocs,j,idx,nsends;
797   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
798   PetscInt          *rvalues,count,base,slen,*source;
799   PetscInt          *lens,*lrows,*values,m;
800   MPI_Comm          comm = ((PetscObject)A)->comm;
801   MPI_Request       *send_waits,*recv_waits;
802   MPI_Status        recv_status,*send_status;
803   const PetscScalar *xx;
804   PetscScalar       *bb,*mask;
805   Vec               xmask,lmask;
806   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
807   const PetscInt    *aj, *ii,*ridx;
808   PetscScalar       *aa;
809 #if defined(PETSC_DEBUG)
810   PetscBool         found = PETSC_FALSE;
811 #endif
812 
813   PetscFunctionBegin;
814   /*  first count number of contributors to each processor */
815   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
816   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
817   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
818   j = 0;
819   for (i=0; i<N; i++) {
820     if (lastidx > (idx = rows[i])) j = 0;
821     lastidx = idx;
822     for (; j<size; j++) {
823       if (idx >= owners[j] && idx < owners[j+1]) {
824         nprocs[2*j]++;
825         nprocs[2*j+1] = 1;
826         owner[i] = j;
827 #if defined(PETSC_DEBUG)
828         found = PETSC_TRUE;
829 #endif
830         break;
831       }
832     }
833 #if defined(PETSC_DEBUG)
834     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
835     found = PETSC_FALSE;
836 #endif
837   }
838   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
839 
840   /* inform other processors of number of messages and max length*/
841   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
842 
843   /* post receives:   */
844   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
845   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
846   for (i=0; i<nrecvs; i++) {
847     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
848   }
849 
850   /* do sends:
851       1) starts[i] gives the starting index in svalues for stuff going to
852          the ith processor
853   */
854   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
855   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
856   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
857   starts[0] = 0;
858   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
859   for (i=0; i<N; i++) {
860     svalues[starts[owner[i]]++] = rows[i];
861   }
862 
863   starts[0] = 0;
864   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
865   count = 0;
866   for (i=0; i<size; i++) {
867     if (nprocs[2*i+1]) {
868       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
869     }
870   }
871   ierr = PetscFree(starts);CHKERRQ(ierr);
872 
873   base = owners[rank];
874 
875   /*  wait on receives */
876   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
877   count  = nrecvs; slen = 0;
878   while (count) {
879     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
880     /* unpack receives into our local space */
881     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
882     source[imdex]  = recv_status.MPI_SOURCE;
883     lens[imdex]    = n;
884     slen          += n;
885     count--;
886   }
887   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
888 
889   /* move the data into the send scatter */
890   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
891   count = 0;
892   for (i=0; i<nrecvs; i++) {
893     values = rvalues + i*nmax;
894     for (j=0; j<lens[i]; j++) {
895       lrows[count++] = values[j] - base;
896     }
897   }
898   ierr = PetscFree(rvalues);CHKERRQ(ierr);
899   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
900   ierr = PetscFree(owner);CHKERRQ(ierr);
901   ierr = PetscFree(nprocs);CHKERRQ(ierr);
902   /* lrows are the local rows to be zeroed, slen is the number of local rows */
903 
904   /* zero diagonal part of matrix */
905   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
906 
907   /* handle off diagonal part of matrix */
908   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
909   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
910   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
911   for (i=0; i<slen; i++) {
912     bb[lrows[i]] = 1;
913   }
914   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
915   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   ierr = VecDestroy(xmask);CHKERRQ(ierr);
918   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
923 
924   /* remove zeroed rows of off diagonal matrix */
925   ii = aij->i;
926   for (i=0; i<slen; i++) {
927     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
928   }
929 
930   /* loop over all elements of off process part of matrix zeroing removed columns*/
931   if (aij->compressedrow.use){
932     m    = aij->compressedrow.nrows;
933     ii   = aij->compressedrow.i;
934     ridx = aij->compressedrow.rindex;
935     for (i=0; i<m; i++){
936       n   = ii[i+1] - ii[i];
937       aj  = aij->j + ii[i];
938       aa  = aij->a + ii[i];
939 
940       for (j=0; j<n; j++) {
941         if (PetscAbsScalar(mask[*aj])) {
942           bb[*ridx] -= *aa*xx[*aj];
943           *aa        = 0.0;
944         }
945         aa++;
946         aj++;
947       }
948       ridx++;
949     }
950   } else { /* do not use compressed row format */
951     m = l->B->rmap->n;
952    for (i=0; i<m; i++) {
953      n   = ii[i+1] - ii[i];
954      aj  = aij->j + ii[i];
955      aa  = aij->a + ii[i];
956      for (j=0; j<n; j++) {
957         if (PetscAbsScalar(mask[*aj])) {
958           bb[i] -= *aa*xx[*aj];
959           *aa    = 0.0;
960         }
961         aa++;
962         aj++;
963       }
964     }
965   }
966 
967   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
968   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
969   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
970   ierr = VecDestroy(lmask);CHKERRQ(ierr);
971   ierr = PetscFree(lrows);CHKERRQ(ierr);
972 
973   /* wait on sends */
974   if (nsends) {
975     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
976     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
977     ierr = PetscFree(send_status);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(send_waits);CHKERRQ(ierr);
980   ierr = PetscFree(svalues);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatMult_MPIAIJ"
987 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
988 {
989   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
990   PetscErrorCode ierr;
991   PetscInt       nt;
992 
993   PetscFunctionBegin;
994   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
995   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
996   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
998   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
999   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1005 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1006 {
1007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1017 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1018 {
1019   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1025   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1032 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1033 {
1034   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1035   PetscErrorCode ierr;
1036   PetscBool      merged;
1037 
1038   PetscFunctionBegin;
1039   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1040   /* do nondiagonal part */
1041   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1042   if (!merged) {
1043     /* send it on its way */
1044     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     /* do local part */
1046     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1047     /* receive remote parts: note this assumes the values are not actually */
1048     /* added in yy until the next line, */
1049     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050   } else {
1051     /* do local part */
1052     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1053     /* send it on its way */
1054     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055     /* values actually were received in the Begin() but we need to call this nop */
1056     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 EXTERN_C_BEGIN
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1064 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1065 {
1066   MPI_Comm       comm;
1067   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1068   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1069   IS             Me,Notme;
1070   PetscErrorCode ierr;
1071   PetscInt       M,N,first,last,*notme,i;
1072   PetscMPIInt    size;
1073 
1074   PetscFunctionBegin;
1075 
1076   /* Easy test: symmetric diagonal block */
1077   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1078   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1079   if (!*f) PetscFunctionReturn(0);
1080   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1082   if (size == 1) PetscFunctionReturn(0);
1083 
1084   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1085   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1086   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1087   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1088   for (i=0; i<first; i++) notme[i] = i;
1089   for (i=last; i<M; i++) notme[i-last+first] = i;
1090   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1091   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1092   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1093   Aoff = Aoffs[0];
1094   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1095   Boff = Boffs[0];
1096   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1098   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1099   ierr = ISDestroy(Me);CHKERRQ(ierr);
1100   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1101 
1102   PetscFunctionReturn(0);
1103 }
1104 EXTERN_C_END
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1108 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   /* do nondiagonal part */
1115   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1116   /* send it on its way */
1117   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1118   /* do local part */
1119   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1120   /* receive remote parts */
1121   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*
1126   This only works correctly for square matrices where the subblock A->A is the
1127    diagonal block
1128 */
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1131 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1132 {
1133   PetscErrorCode ierr;
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1138   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1139   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatScale_MPIAIJ"
1145 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1152   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatDestroy_MPIAIJ"
1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1159 {
1160   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164 #if defined(PETSC_USE_LOG)
1165   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1166 #endif
1167   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1168   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1169   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1170   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1171 #if defined (PETSC_USE_CTABLE)
1172   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1173 #else
1174   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1177   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1178   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1179   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1180   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1181   ierr = PetscFree(aij);CHKERRQ(ierr);
1182 
1183   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1197 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1198 {
1199   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1200   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1201   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1202   PetscErrorCode    ierr;
1203   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1204   int               fd;
1205   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1206   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1207   PetscScalar       *column_values;
1208   PetscInt          message_count,flowcontrolcount;
1209 
1210   PetscFunctionBegin;
1211   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1212   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1213   nz   = A->nz + B->nz;
1214   if (!rank) {
1215     header[0] = MAT_FILE_CLASSID;
1216     header[1] = mat->rmap->N;
1217     header[2] = mat->cmap->N;
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1219     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1220     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1221     /* get largest number of rows any processor has */
1222     rlen = mat->rmap->n;
1223     range = mat->rmap->range;
1224     for (i=1; i<size; i++) {
1225       rlen = PetscMax(rlen,range[i+1] - range[i]);
1226     }
1227   } else {
1228     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1229     rlen = mat->rmap->n;
1230   }
1231 
1232   /* load up the local row counts */
1233   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1234   for (i=0; i<mat->rmap->n; i++) {
1235     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1236   }
1237 
1238   /* store the row lengths to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     MPI_Status status;
1242     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1243     for (i=1; i<size; i++) {
1244       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1245       rlen = range[i+1] - range[i];
1246       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1247       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1248     }
1249     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1250   } else {
1251     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1252     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1253     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1254   }
1255   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1256 
1257   /* load up the local column indices */
1258   nzmax = nz; /* )th processor needs space a largest processor needs */
1259   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1260   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1261   cnt  = 0;
1262   for (i=0; i<mat->rmap->n; i++) {
1263     for (j=B->i[i]; j<B->i[i+1]; j++) {
1264       if ( (col = garray[B->j[j]]) > cstart) break;
1265       column_indices[cnt++] = col;
1266     }
1267     for (k=A->i[i]; k<A->i[i+1]; k++) {
1268       column_indices[cnt++] = A->j[k] + cstart;
1269     }
1270     for (; j<B->i[i+1]; j++) {
1271       column_indices[cnt++] = garray[B->j[j]];
1272     }
1273   }
1274   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1275 
1276   /* store the column indices to the file */
1277    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1278   if (!rank) {
1279     MPI_Status status;
1280     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1283       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1284       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1285       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1292     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1296 
1297   /* load up the local column values */
1298   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1299   cnt  = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ( garray[B->j[j]] > cstart) break;
1303       column_values[cnt++] = B->a[j];
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) {
1306       column_values[cnt++] = A->a[k];
1307     }
1308     for (; j<B->i[i+1]; j++) {
1309       column_values[cnt++] = B->a[j];
1310     }
1311   }
1312   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1313 
1314   /* store the column values to the file */
1315    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1316   if (!rank) {
1317     MPI_Status status;
1318     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1319     for (i=1; i<size; i++) {
1320        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1321       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1322       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1323       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1324       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1325     }
1326     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1327   } else {
1328     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1329     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1331     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1332   }
1333   ierr = PetscFree(column_values);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1339 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1340 {
1341   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1342   PetscErrorCode    ierr;
1343   PetscMPIInt       rank = aij->rank,size = aij->size;
1344   PetscBool         isdraw,iascii,isbinary;
1345   PetscViewer       sviewer;
1346   PetscViewerFormat format;
1347 
1348   PetscFunctionBegin;
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1350   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1351   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1352   if (iascii) {
1353     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1354     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1355       MatInfo    info;
1356       PetscBool  inodes;
1357 
1358       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1359       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1361       if (!inodes) {
1362         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1363 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1364       } else {
1365         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1366 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1367       }
1368       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1370       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1371       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1372       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1374       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1375       PetscFunctionReturn(0);
1376     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1377       PetscInt   inodecount,inodelimit,*inodes;
1378       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1379       if (inodes) {
1380         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1381       } else {
1382         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1383       }
1384       PetscFunctionReturn(0);
1385     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1386       PetscFunctionReturn(0);
1387     }
1388   } else if (isbinary) {
1389     if (size == 1) {
1390       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1391       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1392     } else {
1393       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1394     }
1395     PetscFunctionReturn(0);
1396   } else if (isdraw) {
1397     PetscDraw  draw;
1398     PetscBool  isnull;
1399     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1400     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1401   }
1402 
1403   if (size == 1) {
1404     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1405     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1406   } else {
1407     /* assemble the entire matrix onto first processor. */
1408     Mat         A;
1409     Mat_SeqAIJ  *Aloc;
1410     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1411     MatScalar   *a;
1412 
1413     if (mat->rmap->N > 1024) {
1414       PetscBool  flg = PETSC_FALSE;
1415 
1416       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1417       if (!flg) {
1418         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1419       }
1420     }
1421 
1422     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1423     if (!rank) {
1424       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1425     } else {
1426       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1427     }
1428     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1429     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1430     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1431     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1432 
1433     /* copy over the A part */
1434     Aloc = (Mat_SeqAIJ*)aij->A->data;
1435     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1436     row = mat->rmap->rstart;
1437     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1438     for (i=0; i<m; i++) {
1439       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1440       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1441     }
1442     aj = Aloc->j;
1443     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1444 
1445     /* copy over the B part */
1446     Aloc = (Mat_SeqAIJ*)aij->B->data;
1447     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1448     row  = mat->rmap->rstart;
1449     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1450     ct   = cols;
1451     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1452     for (i=0; i<m; i++) {
1453       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1454       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1455     }
1456     ierr = PetscFree(ct);CHKERRQ(ierr);
1457     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459     /*
1460        Everyone has to call to draw the matrix since the graphics waits are
1461        synchronized across all processors that share the PetscDraw object
1462     */
1463     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1464     if (!rank) {
1465       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1466       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1467       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1468       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1469     }
1470     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1471     ierr = MatDestroy(A);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatView_MPIAIJ"
1478 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1479 {
1480   PetscErrorCode ierr;
1481   PetscBool      iascii,isdraw,issocket,isbinary;
1482 
1483   PetscFunctionBegin;
1484   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1485   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1486   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1487   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1488   if (iascii || isdraw || isbinary || issocket) {
1489     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1490   } else {
1491     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatSOR_MPIAIJ"
1498 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1499 {
1500   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1501   PetscErrorCode ierr;
1502   Vec            bb1 = 0;
1503   PetscBool      hasop;
1504 
1505   PetscFunctionBegin;
1506   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1507     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1508   }
1509 
1510   if (flag == SOR_APPLY_UPPER) {
1511     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1512     PetscFunctionReturn(0);
1513   }
1514 
1515   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1516     if (flag & SOR_ZERO_INITIAL_GUESS) {
1517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1518       its--;
1519     }
1520 
1521     while (its--) {
1522       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524 
1525       /* update rhs: bb1 = bb - B*x */
1526       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1527       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1528 
1529       /* local sweep */
1530       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1531     }
1532   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1533     if (flag & SOR_ZERO_INITIAL_GUESS) {
1534       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1535       its--;
1536     }
1537     while (its--) {
1538       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540 
1541       /* update rhs: bb1 = bb - B*x */
1542       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1543       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1544 
1545       /* local sweep */
1546       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1547     }
1548   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1549     if (flag & SOR_ZERO_INITIAL_GUESS) {
1550       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1551       its--;
1552     }
1553     while (its--) {
1554       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557       /* update rhs: bb1 = bb - B*x */
1558       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1559       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1560 
1561       /* local sweep */
1562       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1563     }
1564   }  else if (flag & SOR_EISENSTAT) {
1565     Vec         xx1;
1566 
1567     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1568     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1569 
1570     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572     if (!mat->diag) {
1573       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1574       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1575     }
1576     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1577     if (hasop) {
1578       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1579     } else {
1580       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1581     }
1582     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1583 
1584     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1585 
1586     /* local sweep */
1587     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1588     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1589     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1590   } else {
1591     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1592   }
1593 
1594   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatPermute_MPIAIJ"
1600 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1601 {
1602   MPI_Comm       comm,pcomm;
1603   PetscInt       first,local_size,nrows;
1604   const PetscInt *rows;
1605   PetscMPIInt    size;
1606   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1611   /* make a collective version of 'rowp' */
1612   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1613   if (pcomm==comm) {
1614     crowp = rowp;
1615   } else {
1616     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1617     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1618     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1619     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1620   }
1621   /* collect the global row permutation and invert it */
1622   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1623   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1624   if (pcomm!=comm) {
1625     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1626   }
1627   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1628   /* get the local target indices */
1629   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1630   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1631   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1632   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1633   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1634   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1635   /* the column permutation is so much easier;
1636      make a local version of 'colp' and invert it */
1637   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1638   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1639   if (size==1) {
1640     lcolp = colp;
1641   } else {
1642     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1643     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1644     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1645   }
1646   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1647   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1648   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1649   if (size>1) {
1650     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1651     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1652   }
1653   /* now we just get the submatrix */
1654   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1655   /* clean up */
1656   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1657   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1663 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1664 {
1665   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1666   Mat            A = mat->A,B = mat->B;
1667   PetscErrorCode ierr;
1668   PetscReal      isend[5],irecv[5];
1669 
1670   PetscFunctionBegin;
1671   info->block_size     = 1.0;
1672   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1673   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1674   isend[3] = info->memory;  isend[4] = info->mallocs;
1675   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1676   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1677   isend[3] += info->memory;  isend[4] += info->mallocs;
1678   if (flag == MAT_LOCAL) {
1679     info->nz_used      = isend[0];
1680     info->nz_allocated = isend[1];
1681     info->nz_unneeded  = isend[2];
1682     info->memory       = isend[3];
1683     info->mallocs      = isend[4];
1684   } else if (flag == MAT_GLOBAL_MAX) {
1685     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1686     info->nz_used      = irecv[0];
1687     info->nz_allocated = irecv[1];
1688     info->nz_unneeded  = irecv[2];
1689     info->memory       = irecv[3];
1690     info->mallocs      = irecv[4];
1691   } else if (flag == MAT_GLOBAL_SUM) {
1692     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1693     info->nz_used      = irecv[0];
1694     info->nz_allocated = irecv[1];
1695     info->nz_unneeded  = irecv[2];
1696     info->memory       = irecv[3];
1697     info->mallocs      = irecv[4];
1698   }
1699   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1700   info->fill_ratio_needed = 0;
1701   info->factor_mallocs    = 0;
1702 
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatSetOption_MPIAIJ"
1708 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1709 {
1710   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   switch (op) {
1715   case MAT_NEW_NONZERO_LOCATIONS:
1716   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1717   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1718   case MAT_KEEP_NONZERO_PATTERN:
1719   case MAT_NEW_NONZERO_LOCATION_ERR:
1720   case MAT_USE_INODES:
1721   case MAT_IGNORE_ZERO_ENTRIES:
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_ROW_ORIENTED:
1726     a->roworiented = flg;
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1729     break;
1730   case MAT_NEW_DIAGONALS:
1731     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1732     break;
1733   case MAT_IGNORE_OFF_PROC_ENTRIES:
1734     a->donotstash = PETSC_TRUE;
1735     break;
1736   case MAT_SPD:
1737     A->spd_set                         = PETSC_TRUE;
1738     A->spd                             = flg;
1739     if (flg) {
1740       A->symmetric                     = PETSC_TRUE;
1741       A->structurally_symmetric        = PETSC_TRUE;
1742       A->symmetric_set                 = PETSC_TRUE;
1743       A->structurally_symmetric_set    = PETSC_TRUE;
1744     }
1745     break;
1746   case MAT_SYMMETRIC:
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_STRUCTURALLY_SYMMETRIC:
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   case MAT_HERMITIAN:
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     break;
1755   case MAT_SYMMETRY_ETERNAL:
1756     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1757     break;
1758   default:
1759     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1760   }
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatGetRow_MPIAIJ"
1766 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1767 {
1768   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1769   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1770   PetscErrorCode ierr;
1771   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1772   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1773   PetscInt       *cmap,*idx_p;
1774 
1775   PetscFunctionBegin;
1776   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1777   mat->getrowactive = PETSC_TRUE;
1778 
1779   if (!mat->rowvalues && (idx || v)) {
1780     /*
1781         allocate enough space to hold information from the longest row.
1782     */
1783     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1784     PetscInt   max = 1,tmp;
1785     for (i=0; i<matin->rmap->n; i++) {
1786       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1787       if (max < tmp) { max = tmp; }
1788     }
1789     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1790   }
1791 
1792   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1793   lrow = row - rstart;
1794 
1795   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1796   if (!v)   {pvA = 0; pvB = 0;}
1797   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1798   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1799   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1800   nztot = nzA + nzB;
1801 
1802   cmap  = mat->garray;
1803   if (v  || idx) {
1804     if (nztot) {
1805       /* Sort by increasing column numbers, assuming A and B already sorted */
1806       PetscInt imark = -1;
1807       if (v) {
1808         *v = v_p = mat->rowvalues;
1809         for (i=0; i<nzB; i++) {
1810           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1811           else break;
1812         }
1813         imark = i;
1814         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1815         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1816       }
1817       if (idx) {
1818         *idx = idx_p = mat->rowindices;
1819         if (imark > -1) {
1820           for (i=0; i<imark; i++) {
1821             idx_p[i] = cmap[cworkB[i]];
1822           }
1823         } else {
1824           for (i=0; i<nzB; i++) {
1825             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1826             else break;
1827           }
1828           imark = i;
1829         }
1830         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1831         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1832       }
1833     } else {
1834       if (idx) *idx = 0;
1835       if (v)   *v   = 0;
1836     }
1837   }
1838   *nz = nztot;
1839   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1840   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1846 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1847 {
1848   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1849 
1850   PetscFunctionBegin;
1851   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1852   aij->getrowactive = PETSC_FALSE;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatNorm_MPIAIJ"
1858 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1859 {
1860   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1861   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1862   PetscErrorCode ierr;
1863   PetscInt       i,j,cstart = mat->cmap->rstart;
1864   PetscReal      sum = 0.0;
1865   MatScalar      *v;
1866 
1867   PetscFunctionBegin;
1868   if (aij->size == 1) {
1869     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1870   } else {
1871     if (type == NORM_FROBENIUS) {
1872       v = amat->a;
1873       for (i=0; i<amat->nz; i++) {
1874 #if defined(PETSC_USE_COMPLEX)
1875         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1876 #else
1877         sum += (*v)*(*v); v++;
1878 #endif
1879       }
1880       v = bmat->a;
1881       for (i=0; i<bmat->nz; i++) {
1882 #if defined(PETSC_USE_COMPLEX)
1883         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1884 #else
1885         sum += (*v)*(*v); v++;
1886 #endif
1887       }
1888       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1889       *norm = sqrt(*norm);
1890     } else if (type == NORM_1) { /* max column norm */
1891       PetscReal *tmp,*tmp2;
1892       PetscInt  *jj,*garray = aij->garray;
1893       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1894       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1895       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1896       *norm = 0.0;
1897       v = amat->a; jj = amat->j;
1898       for (j=0; j<amat->nz; j++) {
1899         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1900       }
1901       v = bmat->a; jj = bmat->j;
1902       for (j=0; j<bmat->nz; j++) {
1903         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1904       }
1905       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1906       for (j=0; j<mat->cmap->N; j++) {
1907         if (tmp2[j] > *norm) *norm = tmp2[j];
1908       }
1909       ierr = PetscFree(tmp);CHKERRQ(ierr);
1910       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1911     } else if (type == NORM_INFINITY) { /* max row norm */
1912       PetscReal ntemp = 0.0;
1913       for (j=0; j<aij->A->rmap->n; j++) {
1914         v = amat->a + amat->i[j];
1915         sum = 0.0;
1916         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1917           sum += PetscAbsScalar(*v); v++;
1918         }
1919         v = bmat->a + bmat->i[j];
1920         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1921           sum += PetscAbsScalar(*v); v++;
1922         }
1923         if (sum > ntemp) ntemp = sum;
1924       }
1925       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1926     } else {
1927       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1928     }
1929   }
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatTranspose_MPIAIJ"
1935 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1936 {
1937   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1938   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1939   PetscErrorCode ierr;
1940   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1941   PetscInt       cstart=A->cmap->rstart,ncol;
1942   Mat            B;
1943   MatScalar      *array;
1944 
1945   PetscFunctionBegin;
1946   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1947 
1948   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1949   ai = Aloc->i; aj = Aloc->j;
1950   bi = Bloc->i; bj = Bloc->j;
1951   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1952     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1953     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1954     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1955     for (i=0; i<ai[ma]; i++){
1956       d_nnz[aj[i]] ++;
1957       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1958     }
1959 
1960     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1963     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1964     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1965   } else {
1966     B = *matout;
1967   }
1968 
1969   /* copy over the A part */
1970   array = Aloc->a;
1971   row = A->rmap->rstart;
1972   for (i=0; i<ma; i++) {
1973     ncol = ai[i+1]-ai[i];
1974     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1975     row++; array += ncol; aj += ncol;
1976   }
1977   aj = Aloc->j;
1978   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1979 
1980   /* copy over the B part */
1981   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1982   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1983   array = Bloc->a;
1984   row = A->rmap->rstart;
1985   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1986   cols_tmp = cols;
1987   for (i=0; i<mb; i++) {
1988     ncol = bi[i+1]-bi[i];
1989     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1990     row++; array += ncol; cols_tmp += ncol;
1991   }
1992   ierr = PetscFree(cols);CHKERRQ(ierr);
1993 
1994   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1997     *matout = B;
1998   } else {
1999     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2007 {
2008   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2009   Mat            a = aij->A,b = aij->B;
2010   PetscErrorCode ierr;
2011   PetscInt       s1,s2,s3;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2015   if (rr) {
2016     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2017     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2018     /* Overlap communication with computation. */
2019     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2020   }
2021   if (ll) {
2022     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2023     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2024     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2025   }
2026   /* scale  the diagonal block */
2027   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2028 
2029   if (rr) {
2030     /* Do a scatter end and then right scale the off-diagonal block */
2031     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2032     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2033   }
2034 
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2040 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2041 {
2042   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2043   PetscErrorCode ierr;
2044 
2045   PetscFunctionBegin;
2046   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2047   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2048   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2049   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2054 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2055 {
2056   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatEqual_MPIAIJ"
2066 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2067 {
2068   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2069   Mat            a,b,c,d;
2070   PetscBool      flg;
2071   PetscErrorCode ierr;
2072 
2073   PetscFunctionBegin;
2074   a = matA->A; b = matA->B;
2075   c = matB->A; d = matB->B;
2076 
2077   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2078   if (flg) {
2079     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2080   }
2081   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatCopy_MPIAIJ"
2087 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2088 {
2089   PetscErrorCode ierr;
2090   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2091   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2092 
2093   PetscFunctionBegin;
2094   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2095   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2096     /* because of the column compression in the off-processor part of the matrix a->B,
2097        the number of columns in a->B and b->B may be different, hence we cannot call
2098        the MatCopy() directly on the two parts. If need be, we can provide a more
2099        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2100        then copying the submatrices */
2101     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2102   } else {
2103     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2104     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2105   }
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 #undef __FUNCT__
2110 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2111 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2112 {
2113   PetscErrorCode ierr;
2114 
2115   PetscFunctionBegin;
2116   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 #undef __FUNCT__
2121 #define __FUNCT__ "MatAXPY_MPIAIJ"
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2123 {
2124   PetscErrorCode ierr;
2125   PetscInt       i;
2126   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2127   PetscBLASInt   bnz,one=1;
2128   Mat_SeqAIJ     *x,*y;
2129 
2130   PetscFunctionBegin;
2131   if (str == SAME_NONZERO_PATTERN) {
2132     PetscScalar alpha = a;
2133     x = (Mat_SeqAIJ *)xx->A->data;
2134     y = (Mat_SeqAIJ *)yy->A->data;
2135     bnz = PetscBLASIntCast(x->nz);
2136     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2137     x = (Mat_SeqAIJ *)xx->B->data;
2138     y = (Mat_SeqAIJ *)yy->B->data;
2139     bnz = PetscBLASIntCast(x->nz);
2140     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2141   } else if (str == SUBSET_NONZERO_PATTERN) {
2142     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2143 
2144     x = (Mat_SeqAIJ *)xx->B->data;
2145     y = (Mat_SeqAIJ *)yy->B->data;
2146     if (y->xtoy && y->XtoY != xx->B) {
2147       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2148       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2149     }
2150     if (!y->xtoy) { /* get xtoy */
2151       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2152       y->XtoY = xx->B;
2153       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2154     }
2155     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2156   } else {
2157     Mat B;
2158     PetscInt *nnz_d,*nnz_o;
2159     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2160     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2161     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2162     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2163     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2164     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2165     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2166     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2167     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2168     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2169     ierr = MatHeaderReplace(Y,B);
2170     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2171     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatConjugate_MPIAIJ"
2180 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2181 {
2182 #if defined(PETSC_USE_COMPLEX)
2183   PetscErrorCode ierr;
2184   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2185 
2186   PetscFunctionBegin;
2187   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2188   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2189 #else
2190   PetscFunctionBegin;
2191 #endif
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatRealPart_MPIAIJ"
2197 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2198 {
2199   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2204   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2210 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2211 {
2212   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2217   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #ifdef PETSC_HAVE_PBGL
2222 
2223 #include <boost/parallel/mpi/bsp_process_group.hpp>
2224 #include <boost/graph/distributed/ilu_default_graph.hpp>
2225 #include <boost/graph/distributed/ilu_0_block.hpp>
2226 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2227 #include <boost/graph/distributed/petsc/interface.hpp>
2228 #include <boost/multi_array.hpp>
2229 #include <boost/parallel/distributed_property_map->hpp>
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2233 /*
2234   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2235 */
2236 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2237 {
2238   namespace petsc = boost::distributed::petsc;
2239 
2240   namespace graph_dist = boost::graph::distributed;
2241   using boost::graph::distributed::ilu_default::process_group_type;
2242   using boost::graph::ilu_permuted;
2243 
2244   PetscBool       row_identity, col_identity;
2245   PetscContainer  c;
2246   PetscInt        m, n, M, N;
2247   PetscErrorCode  ierr;
2248 
2249   PetscFunctionBegin;
2250   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2251   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2252   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2253   if (!row_identity || !col_identity) {
2254     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2255   }
2256 
2257   process_group_type pg;
2258   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2259   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2260   lgraph_type&   level_graph = *lgraph_p;
2261   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2262 
2263   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2264   ilu_permuted(level_graph);
2265 
2266   /* put together the new matrix */
2267   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2268   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2269   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2270   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2271   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2272   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274 
2275   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2276   ierr = PetscContainerSetPointer(c, lgraph_p);
2277   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2284 {
2285   PetscFunctionBegin;
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatSolve_MPIAIJ"
2291 /*
2292   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2293 */
2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2295 {
2296   namespace graph_dist = boost::graph::distributed;
2297 
2298   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2299   lgraph_type*   lgraph_p;
2300   PetscContainer c;
2301   PetscErrorCode ierr;
2302 
2303   PetscFunctionBegin;
2304   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2305   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2306   ierr = VecCopy(b, x);CHKERRQ(ierr);
2307 
2308   PetscScalar* array_x;
2309   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2310   PetscInt sx;
2311   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2312 
2313   PetscScalar* array_b;
2314   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2315   PetscInt sb;
2316   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2317 
2318   lgraph_type&   level_graph = *lgraph_p;
2319   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2320 
2321   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2322   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2323                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2324 
2325   typedef boost::iterator_property_map<array_ref_type::iterator,
2326                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2327   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2328                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2329 
2330   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2331 
2332   PetscFunctionReturn(0);
2333 }
2334 #endif
2335 
2336 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2337   PetscInt       nzlocal,nsends,nrecvs;
2338   PetscMPIInt    *send_rank,*recv_rank;
2339   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2340   PetscScalar    *sbuf_a,**rbuf_a;
2341   PetscErrorCode (*MatDestroy)(Mat);
2342 } Mat_Redundant;
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2346 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2347 {
2348   PetscErrorCode       ierr;
2349   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2350   PetscInt             i;
2351 
2352   PetscFunctionBegin;
2353   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2354   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2355   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2356   for (i=0; i<redund->nrecvs; i++){
2357     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2358     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2359   }
2360   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2361   ierr = PetscFree(redund);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatDestroy_MatRedundant"
2367 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2368 {
2369   PetscErrorCode  ierr;
2370   PetscContainer  container;
2371   Mat_Redundant   *redund=PETSC_NULL;
2372 
2373   PetscFunctionBegin;
2374   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2375   if (container) {
2376     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2377   } else {
2378     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2379   }
2380   A->ops->destroy = redund->MatDestroy;
2381   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2382   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2383   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2389 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2390 {
2391   PetscMPIInt    rank,size;
2392   MPI_Comm       comm=((PetscObject)mat)->comm;
2393   PetscErrorCode ierr;
2394   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2395   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2396   PetscInt       *rowrange=mat->rmap->range;
2397   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2398   Mat            A=aij->A,B=aij->B,C=*matredundant;
2399   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2400   PetscScalar    *sbuf_a;
2401   PetscInt       nzlocal=a->nz+b->nz;
2402   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2403   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2404   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2405   MatScalar      *aworkA,*aworkB;
2406   PetscScalar    *vals;
2407   PetscMPIInt    tag1,tag2,tag3,imdex;
2408   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2409                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2410   MPI_Status     recv_status,*send_status;
2411   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2412   PetscInt       **rbuf_j=PETSC_NULL;
2413   PetscScalar    **rbuf_a=PETSC_NULL;
2414   Mat_Redundant  *redund=PETSC_NULL;
2415   PetscContainer container;
2416 
2417   PetscFunctionBegin;
2418   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2420 
2421   if (reuse == MAT_REUSE_MATRIX) {
2422     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2423     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2424     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2425     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2426     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2427     if (container) {
2428       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2429     } else {
2430       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2431     }
2432     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2433 
2434     nsends    = redund->nsends;
2435     nrecvs    = redund->nrecvs;
2436     send_rank = redund->send_rank;
2437     recv_rank = redund->recv_rank;
2438     sbuf_nz   = redund->sbuf_nz;
2439     rbuf_nz   = redund->rbuf_nz;
2440     sbuf_j    = redund->sbuf_j;
2441     sbuf_a    = redund->sbuf_a;
2442     rbuf_j    = redund->rbuf_j;
2443     rbuf_a    = redund->rbuf_a;
2444   }
2445 
2446   if (reuse == MAT_INITIAL_MATRIX){
2447     PetscMPIInt  subrank,subsize;
2448     PetscInt     nleftover,np_subcomm;
2449     /* get the destination processors' id send_rank, nsends and nrecvs */
2450     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2451     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2452     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2453     np_subcomm = size/nsubcomm;
2454     nleftover  = size - nsubcomm*np_subcomm;
2455     nsends = 0; nrecvs = 0;
2456     for (i=0; i<size; i++){ /* i=rank*/
2457       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2458         send_rank[nsends] = i; nsends++;
2459         recv_rank[nrecvs++] = i;
2460       }
2461     }
2462     if (rank >= size - nleftover){/* this proc is a leftover processor */
2463       i = size-nleftover-1;
2464       j = 0;
2465       while (j < nsubcomm - nleftover){
2466         send_rank[nsends++] = i;
2467         i--; j++;
2468       }
2469     }
2470 
2471     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2472       for (i=0; i<nleftover; i++){
2473         recv_rank[nrecvs++] = size-nleftover+i;
2474       }
2475     }
2476 
2477     /* allocate sbuf_j, sbuf_a */
2478     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2479     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2480     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2481   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2482 
2483   /* copy mat's local entries into the buffers */
2484   if (reuse == MAT_INITIAL_MATRIX){
2485     rownz_max = 0;
2486     rptr = sbuf_j;
2487     cols = sbuf_j + rend-rstart + 1;
2488     vals = sbuf_a;
2489     rptr[0] = 0;
2490     for (i=0; i<rend-rstart; i++){
2491       row = i + rstart;
2492       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2493       ncols  = nzA + nzB;
2494       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2495       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2496       /* load the column indices for this row into cols */
2497       lwrite = 0;
2498       for (l=0; l<nzB; l++) {
2499         if ((ctmp = bmap[cworkB[l]]) < cstart){
2500           vals[lwrite]   = aworkB[l];
2501           cols[lwrite++] = ctmp;
2502         }
2503       }
2504       for (l=0; l<nzA; l++){
2505         vals[lwrite]   = aworkA[l];
2506         cols[lwrite++] = cstart + cworkA[l];
2507       }
2508       for (l=0; l<nzB; l++) {
2509         if ((ctmp = bmap[cworkB[l]]) >= cend){
2510           vals[lwrite]   = aworkB[l];
2511           cols[lwrite++] = ctmp;
2512         }
2513       }
2514       vals += ncols;
2515       cols += ncols;
2516       rptr[i+1] = rptr[i] + ncols;
2517       if (rownz_max < ncols) rownz_max = ncols;
2518     }
2519     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2520   } else { /* only copy matrix values into sbuf_a */
2521     rptr = sbuf_j;
2522     vals = sbuf_a;
2523     rptr[0] = 0;
2524     for (i=0; i<rend-rstart; i++){
2525       row = i + rstart;
2526       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2527       ncols  = nzA + nzB;
2528       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2529       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2530       lwrite = 0;
2531       for (l=0; l<nzB; l++) {
2532         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2533       }
2534       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2535       for (l=0; l<nzB; l++) {
2536         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2537       }
2538       vals += ncols;
2539       rptr[i+1] = rptr[i] + ncols;
2540     }
2541   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2542 
2543   /* send nzlocal to others, and recv other's nzlocal */
2544   /*--------------------------------------------------*/
2545   if (reuse == MAT_INITIAL_MATRIX){
2546     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2547     s_waits2 = s_waits3 + nsends;
2548     s_waits1 = s_waits2 + nsends;
2549     r_waits1 = s_waits1 + nsends;
2550     r_waits2 = r_waits1 + nrecvs;
2551     r_waits3 = r_waits2 + nrecvs;
2552   } else {
2553     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2554     r_waits3 = s_waits3 + nsends;
2555   }
2556 
2557   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2558   if (reuse == MAT_INITIAL_MATRIX){
2559     /* get new tags to keep the communication clean */
2560     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2561     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2562     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2563 
2564     /* post receives of other's nzlocal */
2565     for (i=0; i<nrecvs; i++){
2566       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2567     }
2568     /* send nzlocal to others */
2569     for (i=0; i<nsends; i++){
2570       sbuf_nz[i] = nzlocal;
2571       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2572     }
2573     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2574     count = nrecvs;
2575     while (count) {
2576       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2577       recv_rank[imdex] = recv_status.MPI_SOURCE;
2578       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2579       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2580 
2581       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2582       rbuf_nz[imdex] += i + 2;
2583       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2584       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2585       count--;
2586     }
2587     /* wait on sends of nzlocal */
2588     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2589     /* send mat->i,j to others, and recv from other's */
2590     /*------------------------------------------------*/
2591     for (i=0; i<nsends; i++){
2592       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2593       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2594     }
2595     /* wait on receives of mat->i,j */
2596     /*------------------------------*/
2597     count = nrecvs;
2598     while (count) {
2599       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2600       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2601       count--;
2602     }
2603     /* wait on sends of mat->i,j */
2604     /*---------------------------*/
2605     if (nsends) {
2606       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2607     }
2608   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2609 
2610   /* post receives, send and receive mat->a */
2611   /*----------------------------------------*/
2612   for (imdex=0; imdex<nrecvs; imdex++) {
2613     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2614   }
2615   for (i=0; i<nsends; i++){
2616     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2617   }
2618   count = nrecvs;
2619   while (count) {
2620     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2621     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2622     count--;
2623   }
2624   if (nsends) {
2625     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2626   }
2627 
2628   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2629 
2630   /* create redundant matrix */
2631   /*-------------------------*/
2632   if (reuse == MAT_INITIAL_MATRIX){
2633     /* compute rownz_max for preallocation */
2634     for (imdex=0; imdex<nrecvs; imdex++){
2635       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2636       rptr = rbuf_j[imdex];
2637       for (i=0; i<j; i++){
2638         ncols = rptr[i+1] - rptr[i];
2639         if (rownz_max < ncols) rownz_max = ncols;
2640       }
2641     }
2642 
2643     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2644     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2645     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2646     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2647     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2648   } else {
2649     C = *matredundant;
2650   }
2651 
2652   /* insert local matrix entries */
2653   rptr = sbuf_j;
2654   cols = sbuf_j + rend-rstart + 1;
2655   vals = sbuf_a;
2656   for (i=0; i<rend-rstart; i++){
2657     row   = i + rstart;
2658     ncols = rptr[i+1] - rptr[i];
2659     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2660     vals += ncols;
2661     cols += ncols;
2662   }
2663   /* insert received matrix entries */
2664   for (imdex=0; imdex<nrecvs; imdex++){
2665     rstart = rowrange[recv_rank[imdex]];
2666     rend   = rowrange[recv_rank[imdex]+1];
2667     rptr = rbuf_j[imdex];
2668     cols = rbuf_j[imdex] + rend-rstart + 1;
2669     vals = rbuf_a[imdex];
2670     for (i=0; i<rend-rstart; i++){
2671       row   = i + rstart;
2672       ncols = rptr[i+1] - rptr[i];
2673       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2674       vals += ncols;
2675       cols += ncols;
2676     }
2677   }
2678   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2681   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2682   if (reuse == MAT_INITIAL_MATRIX){
2683     PetscContainer container;
2684     *matredundant = C;
2685     /* create a supporting struct and attach it to C for reuse */
2686     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2687     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2688     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2689     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2690     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2691 
2692     redund->nzlocal = nzlocal;
2693     redund->nsends  = nsends;
2694     redund->nrecvs  = nrecvs;
2695     redund->send_rank = send_rank;
2696     redund->recv_rank = recv_rank;
2697     redund->sbuf_nz = sbuf_nz;
2698     redund->rbuf_nz = rbuf_nz;
2699     redund->sbuf_j  = sbuf_j;
2700     redund->sbuf_a  = sbuf_a;
2701     redund->rbuf_j  = rbuf_j;
2702     redund->rbuf_a  = rbuf_a;
2703 
2704     redund->MatDestroy = C->ops->destroy;
2705     C->ops->destroy    = MatDestroy_MatRedundant;
2706   }
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2712 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2713 {
2714   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2715   PetscErrorCode ierr;
2716   PetscInt       i,*idxb = 0;
2717   PetscScalar    *va,*vb;
2718   Vec            vtmp;
2719 
2720   PetscFunctionBegin;
2721   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2722   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2723   if (idx) {
2724     for (i=0; i<A->rmap->n; i++) {
2725       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2726     }
2727   }
2728 
2729   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2730   if (idx) {
2731     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2732   }
2733   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2734   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2735 
2736   for (i=0; i<A->rmap->n; i++){
2737     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2738       va[i] = vb[i];
2739       if (idx) idx[i] = a->garray[idxb[i]];
2740     }
2741   }
2742 
2743   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2744   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2745   ierr = PetscFree(idxb);CHKERRQ(ierr);
2746   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 #undef __FUNCT__
2751 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2752 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2753 {
2754   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2755   PetscErrorCode ierr;
2756   PetscInt       i,*idxb = 0;
2757   PetscScalar    *va,*vb;
2758   Vec            vtmp;
2759 
2760   PetscFunctionBegin;
2761   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2762   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2763   if (idx) {
2764     for (i=0; i<A->cmap->n; i++) {
2765       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2766     }
2767   }
2768 
2769   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2770   if (idx) {
2771     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2772   }
2773   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2774   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2775 
2776   for (i=0; i<A->rmap->n; i++){
2777     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2778       va[i] = vb[i];
2779       if (idx) idx[i] = a->garray[idxb[i]];
2780     }
2781   }
2782 
2783   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2784   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2785   ierr = PetscFree(idxb);CHKERRQ(ierr);
2786   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2792 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2793 {
2794   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2795   PetscInt       n      = A->rmap->n;
2796   PetscInt       cstart = A->cmap->rstart;
2797   PetscInt      *cmap   = mat->garray;
2798   PetscInt      *diagIdx, *offdiagIdx;
2799   Vec            diagV, offdiagV;
2800   PetscScalar   *a, *diagA, *offdiagA;
2801   PetscInt       r;
2802   PetscErrorCode ierr;
2803 
2804   PetscFunctionBegin;
2805   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2806   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2807   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2808   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2809   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2810   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2811   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2812   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2813   for(r = 0; r < n; ++r) {
2814     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2815       a[r]   = diagA[r];
2816       idx[r] = cstart + diagIdx[r];
2817     } else {
2818       a[r]   = offdiagA[r];
2819       idx[r] = cmap[offdiagIdx[r]];
2820     }
2821   }
2822   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2823   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2824   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2825   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2826   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2827   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 #undef __FUNCT__
2832 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2833 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2834 {
2835   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2836   PetscInt       n      = A->rmap->n;
2837   PetscInt       cstart = A->cmap->rstart;
2838   PetscInt      *cmap   = mat->garray;
2839   PetscInt      *diagIdx, *offdiagIdx;
2840   Vec            diagV, offdiagV;
2841   PetscScalar   *a, *diagA, *offdiagA;
2842   PetscInt       r;
2843   PetscErrorCode ierr;
2844 
2845   PetscFunctionBegin;
2846   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2847   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2848   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2849   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2850   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2851   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2852   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2853   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2854   for(r = 0; r < n; ++r) {
2855     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2856       a[r]   = diagA[r];
2857       idx[r] = cstart + diagIdx[r];
2858     } else {
2859       a[r]   = offdiagA[r];
2860       idx[r] = cmap[offdiagIdx[r]];
2861     }
2862   }
2863   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2865   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2866   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2867   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2868   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2874 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2875 {
2876   PetscErrorCode ierr;
2877   Mat            *dummy;
2878 
2879   PetscFunctionBegin;
2880   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2881   *newmat = *dummy;
2882   ierr = PetscFree(dummy);CHKERRQ(ierr);
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2887 /* -------------------------------------------------------------------*/
2888 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2889        MatGetRow_MPIAIJ,
2890        MatRestoreRow_MPIAIJ,
2891        MatMult_MPIAIJ,
2892 /* 4*/ MatMultAdd_MPIAIJ,
2893        MatMultTranspose_MPIAIJ,
2894        MatMultTransposeAdd_MPIAIJ,
2895 #ifdef PETSC_HAVE_PBGL
2896        MatSolve_MPIAIJ,
2897 #else
2898        0,
2899 #endif
2900        0,
2901        0,
2902 /*10*/ 0,
2903        0,
2904        0,
2905        MatSOR_MPIAIJ,
2906        MatTranspose_MPIAIJ,
2907 /*15*/ MatGetInfo_MPIAIJ,
2908        MatEqual_MPIAIJ,
2909        MatGetDiagonal_MPIAIJ,
2910        MatDiagonalScale_MPIAIJ,
2911        MatNorm_MPIAIJ,
2912 /*20*/ MatAssemblyBegin_MPIAIJ,
2913        MatAssemblyEnd_MPIAIJ,
2914        MatSetOption_MPIAIJ,
2915        MatZeroEntries_MPIAIJ,
2916 /*24*/ MatZeroRows_MPIAIJ,
2917        0,
2918 #ifdef PETSC_HAVE_PBGL
2919        0,
2920 #else
2921        0,
2922 #endif
2923        0,
2924        0,
2925 /*29*/ MatSetUpPreallocation_MPIAIJ,
2926 #ifdef PETSC_HAVE_PBGL
2927        0,
2928 #else
2929        0,
2930 #endif
2931        0,
2932        0,
2933        0,
2934 /*34*/ MatDuplicate_MPIAIJ,
2935        0,
2936        0,
2937        0,
2938        0,
2939 /*39*/ MatAXPY_MPIAIJ,
2940        MatGetSubMatrices_MPIAIJ,
2941        MatIncreaseOverlap_MPIAIJ,
2942        MatGetValues_MPIAIJ,
2943        MatCopy_MPIAIJ,
2944 /*44*/ MatGetRowMax_MPIAIJ,
2945        MatScale_MPIAIJ,
2946        0,
2947        0,
2948        MatZeroRowsColumns_MPIAIJ,
2949 /*49*/ MatSetBlockSize_MPIAIJ,
2950        0,
2951        0,
2952        0,
2953        0,
2954 /*54*/ MatFDColoringCreate_MPIAIJ,
2955        0,
2956        MatSetUnfactored_MPIAIJ,
2957        MatPermute_MPIAIJ,
2958        0,
2959 /*59*/ MatGetSubMatrix_MPIAIJ,
2960        MatDestroy_MPIAIJ,
2961        MatView_MPIAIJ,
2962        0,
2963        0,
2964 /*64*/ 0,
2965        0,
2966        0,
2967        0,
2968        0,
2969 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2970        MatGetRowMinAbs_MPIAIJ,
2971        0,
2972        MatSetColoring_MPIAIJ,
2973 #if defined(PETSC_HAVE_ADIC)
2974        MatSetValuesAdic_MPIAIJ,
2975 #else
2976        0,
2977 #endif
2978        MatSetValuesAdifor_MPIAIJ,
2979 /*75*/ MatFDColoringApply_AIJ,
2980        0,
2981        0,
2982        0,
2983        0,
2984 /*80*/ 0,
2985        0,
2986        0,
2987 /*83*/ MatLoad_MPIAIJ,
2988        0,
2989        0,
2990        0,
2991        0,
2992        0,
2993 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2994        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2995        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2996        MatPtAP_Basic,
2997        MatPtAPSymbolic_MPIAIJ,
2998 /*94*/ MatPtAPNumeric_MPIAIJ,
2999        0,
3000        0,
3001        0,
3002        0,
3003 /*99*/ 0,
3004        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3005        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3006        MatConjugate_MPIAIJ,
3007        0,
3008 /*104*/MatSetValuesRow_MPIAIJ,
3009        MatRealPart_MPIAIJ,
3010        MatImaginaryPart_MPIAIJ,
3011        0,
3012        0,
3013 /*109*/0,
3014        MatGetRedundantMatrix_MPIAIJ,
3015        MatGetRowMin_MPIAIJ,
3016        0,
3017        0,
3018 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3019        0,
3020        0,
3021        0,
3022        0,
3023 /*119*/0,
3024        0,
3025        0,
3026        0,
3027        MatGetMultiProcBlock_MPIAIJ,
3028 /*124*/MatFindNonZeroRows_MPIAIJ,
3029        0,
3030        0,
3031        0,
3032        MatGetSubMatricesParallel_MPIAIJ
3033 };
3034 
3035 /* ----------------------------------------------------------------------------------------*/
3036 
3037 EXTERN_C_BEGIN
3038 #undef __FUNCT__
3039 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3040 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3041 {
3042   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3043   PetscErrorCode ierr;
3044 
3045   PetscFunctionBegin;
3046   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3047   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 EXTERN_C_END
3051 
3052 EXTERN_C_BEGIN
3053 #undef __FUNCT__
3054 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3055 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3056 {
3057   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3058   PetscErrorCode ierr;
3059 
3060   PetscFunctionBegin;
3061   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3062   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 EXTERN_C_BEGIN
3068 #undef __FUNCT__
3069 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3070 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3071 {
3072   Mat_MPIAIJ     *b;
3073   PetscErrorCode ierr;
3074   PetscInt       i;
3075 
3076   PetscFunctionBegin;
3077   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3078   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3079   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3080   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3081 
3082   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3083   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3084   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3085   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3086   if (d_nnz) {
3087     for (i=0; i<B->rmap->n; i++) {
3088       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3089     }
3090   }
3091   if (o_nnz) {
3092     for (i=0; i<B->rmap->n; i++) {
3093       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3094     }
3095   }
3096   b = (Mat_MPIAIJ*)B->data;
3097 
3098   if (!B->preallocated) {
3099     /* Explicitly create 2 MATSEQAIJ matrices. */
3100     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3101     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3102     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3103     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3104     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3105     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3106     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3107     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3108   }
3109 
3110   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3111   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3112   B->preallocated = PETSC_TRUE;
3113   PetscFunctionReturn(0);
3114 }
3115 EXTERN_C_END
3116 
3117 #undef __FUNCT__
3118 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3119 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3120 {
3121   Mat            mat;
3122   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3123   PetscErrorCode ierr;
3124 
3125   PetscFunctionBegin;
3126   *newmat       = 0;
3127   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3128   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3129   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3130   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3131   a    = (Mat_MPIAIJ*)mat->data;
3132 
3133   mat->factortype    = matin->factortype;
3134   mat->rmap->bs      = matin->rmap->bs;
3135   mat->assembled    = PETSC_TRUE;
3136   mat->insertmode   = NOT_SET_VALUES;
3137   mat->preallocated = PETSC_TRUE;
3138 
3139   a->size           = oldmat->size;
3140   a->rank           = oldmat->rank;
3141   a->donotstash     = oldmat->donotstash;
3142   a->roworiented    = oldmat->roworiented;
3143   a->rowindices     = 0;
3144   a->rowvalues      = 0;
3145   a->getrowactive   = PETSC_FALSE;
3146 
3147   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3148   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3149 
3150   if (oldmat->colmap) {
3151 #if defined (PETSC_USE_CTABLE)
3152     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3153 #else
3154     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3155     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3156     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3157 #endif
3158   } else a->colmap = 0;
3159   if (oldmat->garray) {
3160     PetscInt len;
3161     len  = oldmat->B->cmap->n;
3162     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3163     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3164     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3165   } else a->garray = 0;
3166 
3167   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3168   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3169   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3170   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3171   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3172   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3173   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3174   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3175   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3176   *newmat = mat;
3177   PetscFunctionReturn(0);
3178 }
3179 
3180 /*
3181     Allows sending/receiving larger messages then 2 gigabytes in a single call
3182 */
3183 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3184 {
3185   int             ierr;
3186   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3187   PetscInt        i,numchunks;
3188   PetscMPIInt     icnt;
3189 
3190   numchunks = cnt/CHUNKSIZE + 1;
3191   for (i=0; i<numchunks; i++) {
3192     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3193     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3194     if (type == MPIU_INT) {
3195       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3196     } else if (type == MPIU_SCALAR) {
3197       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3198     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3199   }
3200   return 0;
3201 }
3202 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3203 {
3204   int             ierr;
3205   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3206   MPI_Status      status;
3207   PetscInt        i,numchunks;
3208   PetscMPIInt     icnt;
3209 
3210   numchunks = cnt/CHUNKSIZE + 1;
3211   for (i=0; i<numchunks; i++) {
3212     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3213     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3214     if (type == MPIU_INT) {
3215       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3216     } else if (type == MPIU_SCALAR) {
3217       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3218     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3219   }
3220   return 0;
3221 }
3222 
3223 #undef __FUNCT__
3224 #define __FUNCT__ "MatLoad_MPIAIJ"
3225 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3226 {
3227   PetscScalar    *vals,*svals;
3228   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3229   PetscErrorCode ierr;
3230   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3231   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3232   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3233   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3234   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3235   int            fd;
3236 
3237   PetscFunctionBegin;
3238   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3239   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3240   if (!rank) {
3241     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3242     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3243     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3244   }
3245 
3246   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3247 
3248   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3249   M = header[1]; N = header[2];
3250   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3251   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3252   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3253 
3254   /* If global sizes are set, check if they are consistent with that given in the file */
3255   if (sizesset) {
3256     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3257   }
3258   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3259   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3260 
3261   /* determine ownership of all rows */
3262   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3263   else m = newMat->rmap->n; /* Set by user */
3264 
3265   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3266   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3267 
3268   /* First process needs enough room for process with most rows */
3269   if (!rank) {
3270     mmax       = rowners[1];
3271     for (i=2; i<size; i++) {
3272       mmax = PetscMax(mmax,rowners[i]);
3273     }
3274   } else mmax = m;
3275 
3276   rowners[0] = 0;
3277   for (i=2; i<=size; i++) {
3278     rowners[i] += rowners[i-1];
3279   }
3280   rstart = rowners[rank];
3281   rend   = rowners[rank+1];
3282 
3283   /* distribute row lengths to all processors */
3284   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3285   if (!rank) {
3286     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3287     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3288     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3289     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3290     for (j=0; j<m; j++) {
3291       procsnz[0] += ourlens[j];
3292     }
3293     for (i=1; i<size; i++) {
3294       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3295       /* calculate the number of nonzeros on each processor */
3296       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3297         procsnz[i] += rowlengths[j];
3298       }
3299       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3300     }
3301     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3302   } else {
3303     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3304   }
3305 
3306   if (!rank) {
3307     /* determine max buffer needed and allocate it */
3308     maxnz = 0;
3309     for (i=0; i<size; i++) {
3310       maxnz = PetscMax(maxnz,procsnz[i]);
3311     }
3312     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3313 
3314     /* read in my part of the matrix column indices  */
3315     nz   = procsnz[0];
3316     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3317     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3318 
3319     /* read in every one elses and ship off */
3320     for (i=1; i<size; i++) {
3321       nz     = procsnz[i];
3322       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3323       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3324     }
3325     ierr = PetscFree(cols);CHKERRQ(ierr);
3326   } else {
3327     /* determine buffer space needed for message */
3328     nz = 0;
3329     for (i=0; i<m; i++) {
3330       nz += ourlens[i];
3331     }
3332     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3333 
3334     /* receive message of column indices*/
3335     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3336   }
3337 
3338   /* determine column ownership if matrix is not square */
3339   if (N != M) {
3340     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3341     else n = newMat->cmap->n;
3342     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3343     cstart = cend - n;
3344   } else {
3345     cstart = rstart;
3346     cend   = rend;
3347     n      = cend - cstart;
3348   }
3349 
3350   /* loop over local rows, determining number of off diagonal entries */
3351   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3352   jj = 0;
3353   for (i=0; i<m; i++) {
3354     for (j=0; j<ourlens[i]; j++) {
3355       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3356       jj++;
3357     }
3358   }
3359 
3360   for (i=0; i<m; i++) {
3361     ourlens[i] -= offlens[i];
3362   }
3363   if (!sizesset) {
3364     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3365   }
3366   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3367 
3368   for (i=0; i<m; i++) {
3369     ourlens[i] += offlens[i];
3370   }
3371 
3372   if (!rank) {
3373     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3374 
3375     /* read in my part of the matrix numerical values  */
3376     nz   = procsnz[0];
3377     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3378 
3379     /* insert into matrix */
3380     jj      = rstart;
3381     smycols = mycols;
3382     svals   = vals;
3383     for (i=0; i<m; i++) {
3384       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3385       smycols += ourlens[i];
3386       svals   += ourlens[i];
3387       jj++;
3388     }
3389 
3390     /* read in other processors and ship out */
3391     for (i=1; i<size; i++) {
3392       nz     = procsnz[i];
3393       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3394       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3395     }
3396     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3397   } else {
3398     /* receive numeric values */
3399     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3400 
3401     /* receive message of values*/
3402     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3403 
3404     /* insert into matrix */
3405     jj      = rstart;
3406     smycols = mycols;
3407     svals   = vals;
3408     for (i=0; i<m; i++) {
3409       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3410       smycols += ourlens[i];
3411       svals   += ourlens[i];
3412       jj++;
3413     }
3414   }
3415   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3416   ierr = PetscFree(vals);CHKERRQ(ierr);
3417   ierr = PetscFree(mycols);CHKERRQ(ierr);
3418   ierr = PetscFree(rowners);CHKERRQ(ierr);
3419 
3420   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3421   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3422   PetscFunctionReturn(0);
3423 }
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3427 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3428 {
3429   PetscErrorCode ierr;
3430   IS             iscol_local;
3431   PetscInt       csize;
3432 
3433   PetscFunctionBegin;
3434   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3435   if (call == MAT_REUSE_MATRIX) {
3436     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3437     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3438   } else {
3439     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3440   }
3441   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3442   if (call == MAT_INITIAL_MATRIX) {
3443     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3444     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3445   }
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 #undef __FUNCT__
3450 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3451 /*
3452     Not great since it makes two copies of the submatrix, first an SeqAIJ
3453   in local and then by concatenating the local matrices the end result.
3454   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3455 
3456   Note: This requires a sequential iscol with all indices.
3457 */
3458 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3459 {
3460   PetscErrorCode ierr;
3461   PetscMPIInt    rank,size;
3462   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3463   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3464   Mat            *local,M,Mreuse;
3465   MatScalar      *vwork,*aa;
3466   MPI_Comm       comm = ((PetscObject)mat)->comm;
3467   Mat_SeqAIJ     *aij;
3468 
3469 
3470   PetscFunctionBegin;
3471   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3472   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3473 
3474   if (call ==  MAT_REUSE_MATRIX) {
3475     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3476     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3477     local = &Mreuse;
3478     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3479   } else {
3480     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3481     Mreuse = *local;
3482     ierr   = PetscFree(local);CHKERRQ(ierr);
3483   }
3484 
3485   /*
3486       m - number of local rows
3487       n - number of columns (same on all processors)
3488       rstart - first row in new global matrix generated
3489   */
3490   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3491   if (call == MAT_INITIAL_MATRIX) {
3492     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3493     ii  = aij->i;
3494     jj  = aij->j;
3495 
3496     /*
3497         Determine the number of non-zeros in the diagonal and off-diagonal
3498         portions of the matrix in order to do correct preallocation
3499     */
3500 
3501     /* first get start and end of "diagonal" columns */
3502     if (csize == PETSC_DECIDE) {
3503       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3504       if (mglobal == n) { /* square matrix */
3505 	nlocal = m;
3506       } else {
3507         nlocal = n/size + ((n % size) > rank);
3508       }
3509     } else {
3510       nlocal = csize;
3511     }
3512     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3513     rstart = rend - nlocal;
3514     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3515 
3516     /* next, compute all the lengths */
3517     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3518     olens = dlens + m;
3519     for (i=0; i<m; i++) {
3520       jend = ii[i+1] - ii[i];
3521       olen = 0;
3522       dlen = 0;
3523       for (j=0; j<jend; j++) {
3524         if (*jj < rstart || *jj >= rend) olen++;
3525         else dlen++;
3526         jj++;
3527       }
3528       olens[i] = olen;
3529       dlens[i] = dlen;
3530     }
3531     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3532     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3533     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3534     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3535     ierr = PetscFree(dlens);CHKERRQ(ierr);
3536   } else {
3537     PetscInt ml,nl;
3538 
3539     M = *newmat;
3540     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3541     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3542     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3543     /*
3544          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3545        rather than the slower MatSetValues().
3546     */
3547     M->was_assembled = PETSC_TRUE;
3548     M->assembled     = PETSC_FALSE;
3549   }
3550   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3551   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3552   ii  = aij->i;
3553   jj  = aij->j;
3554   aa  = aij->a;
3555   for (i=0; i<m; i++) {
3556     row   = rstart + i;
3557     nz    = ii[i+1] - ii[i];
3558     cwork = jj;     jj += nz;
3559     vwork = aa;     aa += nz;
3560     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3561   }
3562 
3563   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3564   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3565   *newmat = M;
3566 
3567   /* save submatrix used in processor for next request */
3568   if (call ==  MAT_INITIAL_MATRIX) {
3569     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3570     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3571   }
3572 
3573   PetscFunctionReturn(0);
3574 }
3575 
3576 EXTERN_C_BEGIN
3577 #undef __FUNCT__
3578 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3579 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3580 {
3581   PetscInt       m,cstart, cend,j,nnz,i,d;
3582   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3583   const PetscInt *JJ;
3584   PetscScalar    *values;
3585   PetscErrorCode ierr;
3586 
3587   PetscFunctionBegin;
3588   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3589 
3590   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3591   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3592   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3593   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3594   m      = B->rmap->n;
3595   cstart = B->cmap->rstart;
3596   cend   = B->cmap->rend;
3597   rstart = B->rmap->rstart;
3598 
3599   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3600 
3601 #if defined(PETSC_USE_DEBUGGING)
3602   for (i=0; i<m; i++) {
3603     nnz     = Ii[i+1]- Ii[i];
3604     JJ      = J + Ii[i];
3605     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3606     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3607     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3608   }
3609 #endif
3610 
3611   for (i=0; i<m; i++) {
3612     nnz     = Ii[i+1]- Ii[i];
3613     JJ      = J + Ii[i];
3614     nnz_max = PetscMax(nnz_max,nnz);
3615     d       = 0;
3616     for (j=0; j<nnz; j++) {
3617       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3618     }
3619     d_nnz[i] = d;
3620     o_nnz[i] = nnz - d;
3621   }
3622   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3623   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3624 
3625   if (v) values = (PetscScalar*)v;
3626   else {
3627     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3628     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3629   }
3630 
3631   for (i=0; i<m; i++) {
3632     ii   = i + rstart;
3633     nnz  = Ii[i+1]- Ii[i];
3634     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3635   }
3636   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638 
3639   if (!v) {
3640     ierr = PetscFree(values);CHKERRQ(ierr);
3641   }
3642   PetscFunctionReturn(0);
3643 }
3644 EXTERN_C_END
3645 
3646 #undef __FUNCT__
3647 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3648 /*@
3649    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3650    (the default parallel PETSc format).
3651 
3652    Collective on MPI_Comm
3653 
3654    Input Parameters:
3655 +  B - the matrix
3656 .  i - the indices into j for the start of each local row (starts with zero)
3657 .  j - the column indices for each local row (starts with zero)
3658 -  v - optional values in the matrix
3659 
3660    Level: developer
3661 
3662    Notes:
3663        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3664      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3665      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3666 
3667        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3668 
3669        The format which is used for the sparse matrix input, is equivalent to a
3670     row-major ordering.. i.e for the following matrix, the input data expected is
3671     as shown:
3672 
3673         1 0 0
3674         2 0 3     P0
3675        -------
3676         4 5 6     P1
3677 
3678      Process0 [P0]: rows_owned=[0,1]
3679         i =  {0,1,3}  [size = nrow+1  = 2+1]
3680         j =  {0,0,2}  [size = nz = 6]
3681         v =  {1,2,3}  [size = nz = 6]
3682 
3683      Process1 [P1]: rows_owned=[2]
3684         i =  {0,3}    [size = nrow+1  = 1+1]
3685         j =  {0,1,2}  [size = nz = 6]
3686         v =  {4,5,6}  [size = nz = 6]
3687 
3688 .keywords: matrix, aij, compressed row, sparse, parallel
3689 
3690 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3691           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3692 @*/
3693 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3694 {
3695   PetscErrorCode ierr;
3696 
3697   PetscFunctionBegin;
3698   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 #undef __FUNCT__
3703 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3704 /*@C
3705    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3706    (the default parallel PETSc format).  For good matrix assembly performance
3707    the user should preallocate the matrix storage by setting the parameters
3708    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3709    performance can be increased by more than a factor of 50.
3710 
3711    Collective on MPI_Comm
3712 
3713    Input Parameters:
3714 +  A - the matrix
3715 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3716            (same value is used for all local rows)
3717 .  d_nnz - array containing the number of nonzeros in the various rows of the
3718            DIAGONAL portion of the local submatrix (possibly different for each row)
3719            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3720            The size of this array is equal to the number of local rows, i.e 'm'.
3721            You must leave room for the diagonal entry even if it is zero.
3722 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3723            submatrix (same value is used for all local rows).
3724 -  o_nnz - array containing the number of nonzeros in the various rows of the
3725            OFF-DIAGONAL portion of the local submatrix (possibly different for
3726            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3727            structure. The size of this array is equal to the number
3728            of local rows, i.e 'm'.
3729 
3730    If the *_nnz parameter is given then the *_nz parameter is ignored
3731 
3732    The AIJ format (also called the Yale sparse matrix format or
3733    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3734    storage.  The stored row and column indices begin with zero.
3735    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3736 
3737    The parallel matrix is partitioned such that the first m0 rows belong to
3738    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3739    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3740 
3741    The DIAGONAL portion of the local submatrix of a processor can be defined
3742    as the submatrix which is obtained by extraction the part corresponding to
3743    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3744    first row that belongs to the processor, r2 is the last row belonging to
3745    the this processor, and c1-c2 is range of indices of the local part of a
3746    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3747    common case of a square matrix, the row and column ranges are the same and
3748    the DIAGONAL part is also square. The remaining portion of the local
3749    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3750 
3751    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3752 
3753    You can call MatGetInfo() to get information on how effective the preallocation was;
3754    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3755    You can also run with the option -info and look for messages with the string
3756    malloc in them to see if additional memory allocation was needed.
3757 
3758    Example usage:
3759 
3760    Consider the following 8x8 matrix with 34 non-zero values, that is
3761    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3762    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3763    as follows:
3764 
3765 .vb
3766             1  2  0  |  0  3  0  |  0  4
3767     Proc0   0  5  6  |  7  0  0  |  8  0
3768             9  0 10  | 11  0  0  | 12  0
3769     -------------------------------------
3770            13  0 14  | 15 16 17  |  0  0
3771     Proc1   0 18  0  | 19 20 21  |  0  0
3772             0  0  0  | 22 23  0  | 24  0
3773     -------------------------------------
3774     Proc2  25 26 27  |  0  0 28  | 29  0
3775            30  0  0  | 31 32 33  |  0 34
3776 .ve
3777 
3778    This can be represented as a collection of submatrices as:
3779 
3780 .vb
3781       A B C
3782       D E F
3783       G H I
3784 .ve
3785 
3786    Where the submatrices A,B,C are owned by proc0, D,E,F are
3787    owned by proc1, G,H,I are owned by proc2.
3788 
3789    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3790    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3791    The 'M','N' parameters are 8,8, and have the same values on all procs.
3792 
3793    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3794    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3795    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3796    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3797    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3798    matrix, ans [DF] as another SeqAIJ matrix.
3799 
3800    When d_nz, o_nz parameters are specified, d_nz storage elements are
3801    allocated for every row of the local diagonal submatrix, and o_nz
3802    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3803    One way to choose d_nz and o_nz is to use the max nonzerors per local
3804    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3805    In this case, the values of d_nz,o_nz are:
3806 .vb
3807      proc0 : dnz = 2, o_nz = 2
3808      proc1 : dnz = 3, o_nz = 2
3809      proc2 : dnz = 1, o_nz = 4
3810 .ve
3811    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3812    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3813    for proc3. i.e we are using 12+15+10=37 storage locations to store
3814    34 values.
3815 
3816    When d_nnz, o_nnz parameters are specified, the storage is specified
3817    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3818    In the above case the values for d_nnz,o_nnz are:
3819 .vb
3820      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3821      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3822      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3823 .ve
3824    Here the space allocated is sum of all the above values i.e 34, and
3825    hence pre-allocation is perfect.
3826 
3827    Level: intermediate
3828 
3829 .keywords: matrix, aij, compressed row, sparse, parallel
3830 
3831 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3832           MPIAIJ, MatGetInfo()
3833 @*/
3834 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3835 {
3836   PetscErrorCode ierr;
3837 
3838   PetscFunctionBegin;
3839   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3840   PetscFunctionReturn(0);
3841 }
3842 
3843 #undef __FUNCT__
3844 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3845 /*@
3846      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3847          CSR format the local rows.
3848 
3849    Collective on MPI_Comm
3850 
3851    Input Parameters:
3852 +  comm - MPI communicator
3853 .  m - number of local rows (Cannot be PETSC_DECIDE)
3854 .  n - This value should be the same as the local size used in creating the
3855        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3856        calculated if N is given) For square matrices n is almost always m.
3857 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3858 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3859 .   i - row indices
3860 .   j - column indices
3861 -   a - matrix values
3862 
3863    Output Parameter:
3864 .   mat - the matrix
3865 
3866    Level: intermediate
3867 
3868    Notes:
3869        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3870      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3871      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3872 
3873        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3874 
3875        The format which is used for the sparse matrix input, is equivalent to a
3876     row-major ordering.. i.e for the following matrix, the input data expected is
3877     as shown:
3878 
3879         1 0 0
3880         2 0 3     P0
3881        -------
3882         4 5 6     P1
3883 
3884      Process0 [P0]: rows_owned=[0,1]
3885         i =  {0,1,3}  [size = nrow+1  = 2+1]
3886         j =  {0,0,2}  [size = nz = 6]
3887         v =  {1,2,3}  [size = nz = 6]
3888 
3889      Process1 [P1]: rows_owned=[2]
3890         i =  {0,3}    [size = nrow+1  = 1+1]
3891         j =  {0,1,2}  [size = nz = 6]
3892         v =  {4,5,6}  [size = nz = 6]
3893 
3894 .keywords: matrix, aij, compressed row, sparse, parallel
3895 
3896 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3897           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3898 @*/
3899 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3900 {
3901   PetscErrorCode ierr;
3902 
3903  PetscFunctionBegin;
3904   if (i[0]) {
3905     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3906   }
3907   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3908   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3909   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3910   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3911   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3912   PetscFunctionReturn(0);
3913 }
3914 
3915 #undef __FUNCT__
3916 #define __FUNCT__ "MatCreateMPIAIJ"
3917 /*@C
3918    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3919    (the default parallel PETSc format).  For good matrix assembly performance
3920    the user should preallocate the matrix storage by setting the parameters
3921    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3922    performance can be increased by more than a factor of 50.
3923 
3924    Collective on MPI_Comm
3925 
3926    Input Parameters:
3927 +  comm - MPI communicator
3928 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3929            This value should be the same as the local size used in creating the
3930            y vector for the matrix-vector product y = Ax.
3931 .  n - This value should be the same as the local size used in creating the
3932        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3933        calculated if N is given) For square matrices n is almost always m.
3934 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3935 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3936 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3937            (same value is used for all local rows)
3938 .  d_nnz - array containing the number of nonzeros in the various rows of the
3939            DIAGONAL portion of the local submatrix (possibly different for each row)
3940            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3941            The size of this array is equal to the number of local rows, i.e 'm'.
3942            You must leave room for the diagonal entry even if it is zero.
3943 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3944            submatrix (same value is used for all local rows).
3945 -  o_nnz - array containing the number of nonzeros in the various rows of the
3946            OFF-DIAGONAL portion of the local submatrix (possibly different for
3947            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3948            structure. The size of this array is equal to the number
3949            of local rows, i.e 'm'.
3950 
3951    Output Parameter:
3952 .  A - the matrix
3953 
3954    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3955    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3956    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3957 
3958    Notes:
3959    If the *_nnz parameter is given then the *_nz parameter is ignored
3960 
3961    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3962    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3963    storage requirements for this matrix.
3964 
3965    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3966    processor than it must be used on all processors that share the object for
3967    that argument.
3968 
3969    The user MUST specify either the local or global matrix dimensions
3970    (possibly both).
3971 
3972    The parallel matrix is partitioned across processors such that the
3973    first m0 rows belong to process 0, the next m1 rows belong to
3974    process 1, the next m2 rows belong to process 2 etc.. where
3975    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3976    values corresponding to [m x N] submatrix.
3977 
3978    The columns are logically partitioned with the n0 columns belonging
3979    to 0th partition, the next n1 columns belonging to the next
3980    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3981 
3982    The DIAGONAL portion of the local submatrix on any given processor
3983    is the submatrix corresponding to the rows and columns m,n
3984    corresponding to the given processor. i.e diagonal matrix on
3985    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3986    etc. The remaining portion of the local submatrix [m x (N-n)]
3987    constitute the OFF-DIAGONAL portion. The example below better
3988    illustrates this concept.
3989 
3990    For a square global matrix we define each processor's diagonal portion
3991    to be its local rows and the corresponding columns (a square submatrix);
3992    each processor's off-diagonal portion encompasses the remainder of the
3993    local matrix (a rectangular submatrix).
3994 
3995    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3996 
3997    When calling this routine with a single process communicator, a matrix of
3998    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3999    type of communicator, use the construction mechanism:
4000      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4001 
4002    By default, this format uses inodes (identical nodes) when possible.
4003    We search for consecutive rows with the same nonzero structure, thereby
4004    reusing matrix information to achieve increased efficiency.
4005 
4006    Options Database Keys:
4007 +  -mat_no_inode  - Do not use inodes
4008 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4009 -  -mat_aij_oneindex - Internally use indexing starting at 1
4010         rather than 0.  Note that when calling MatSetValues(),
4011         the user still MUST index entries starting at 0!
4012 
4013 
4014    Example usage:
4015 
4016    Consider the following 8x8 matrix with 34 non-zero values, that is
4017    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4018    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4019    as follows:
4020 
4021 .vb
4022             1  2  0  |  0  3  0  |  0  4
4023     Proc0   0  5  6  |  7  0  0  |  8  0
4024             9  0 10  | 11  0  0  | 12  0
4025     -------------------------------------
4026            13  0 14  | 15 16 17  |  0  0
4027     Proc1   0 18  0  | 19 20 21  |  0  0
4028             0  0  0  | 22 23  0  | 24  0
4029     -------------------------------------
4030     Proc2  25 26 27  |  0  0 28  | 29  0
4031            30  0  0  | 31 32 33  |  0 34
4032 .ve
4033 
4034    This can be represented as a collection of submatrices as:
4035 
4036 .vb
4037       A B C
4038       D E F
4039       G H I
4040 .ve
4041 
4042    Where the submatrices A,B,C are owned by proc0, D,E,F are
4043    owned by proc1, G,H,I are owned by proc2.
4044 
4045    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4046    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4047    The 'M','N' parameters are 8,8, and have the same values on all procs.
4048 
4049    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4050    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4051    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4052    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4053    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4054    matrix, ans [DF] as another SeqAIJ matrix.
4055 
4056    When d_nz, o_nz parameters are specified, d_nz storage elements are
4057    allocated for every row of the local diagonal submatrix, and o_nz
4058    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4059    One way to choose d_nz and o_nz is to use the max nonzerors per local
4060    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4061    In this case, the values of d_nz,o_nz are:
4062 .vb
4063      proc0 : dnz = 2, o_nz = 2
4064      proc1 : dnz = 3, o_nz = 2
4065      proc2 : dnz = 1, o_nz = 4
4066 .ve
4067    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4068    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4069    for proc3. i.e we are using 12+15+10=37 storage locations to store
4070    34 values.
4071 
4072    When d_nnz, o_nnz parameters are specified, the storage is specified
4073    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4074    In the above case the values for d_nnz,o_nnz are:
4075 .vb
4076      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4077      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4078      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4079 .ve
4080    Here the space allocated is sum of all the above values i.e 34, and
4081    hence pre-allocation is perfect.
4082 
4083    Level: intermediate
4084 
4085 .keywords: matrix, aij, compressed row, sparse, parallel
4086 
4087 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4088           MPIAIJ, MatCreateMPIAIJWithArrays()
4089 @*/
4090 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4091 {
4092   PetscErrorCode ierr;
4093   PetscMPIInt    size;
4094 
4095   PetscFunctionBegin;
4096   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4097   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4098   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4099   if (size > 1) {
4100     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4101     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4102   } else {
4103     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4104     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4105   }
4106   PetscFunctionReturn(0);
4107 }
4108 
4109 #undef __FUNCT__
4110 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4111 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4112 {
4113   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4114 
4115   PetscFunctionBegin;
4116   *Ad     = a->A;
4117   *Ao     = a->B;
4118   *colmap = a->garray;
4119   PetscFunctionReturn(0);
4120 }
4121 
4122 #undef __FUNCT__
4123 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4124 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4125 {
4126   PetscErrorCode ierr;
4127   PetscInt       i;
4128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4129 
4130   PetscFunctionBegin;
4131   if (coloring->ctype == IS_COLORING_GLOBAL) {
4132     ISColoringValue *allcolors,*colors;
4133     ISColoring      ocoloring;
4134 
4135     /* set coloring for diagonal portion */
4136     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4137 
4138     /* set coloring for off-diagonal portion */
4139     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4140     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4141     for (i=0; i<a->B->cmap->n; i++) {
4142       colors[i] = allcolors[a->garray[i]];
4143     }
4144     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4145     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4146     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4147     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4148   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4149     ISColoringValue *colors;
4150     PetscInt        *larray;
4151     ISColoring      ocoloring;
4152 
4153     /* set coloring for diagonal portion */
4154     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4155     for (i=0; i<a->A->cmap->n; i++) {
4156       larray[i] = i + A->cmap->rstart;
4157     }
4158     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4159     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4160     for (i=0; i<a->A->cmap->n; i++) {
4161       colors[i] = coloring->colors[larray[i]];
4162     }
4163     ierr = PetscFree(larray);CHKERRQ(ierr);
4164     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4165     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4166     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4167 
4168     /* set coloring for off-diagonal portion */
4169     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4170     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4171     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4172     for (i=0; i<a->B->cmap->n; i++) {
4173       colors[i] = coloring->colors[larray[i]];
4174     }
4175     ierr = PetscFree(larray);CHKERRQ(ierr);
4176     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4177     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4178     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4179   } else {
4180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4181   }
4182 
4183   PetscFunctionReturn(0);
4184 }
4185 
4186 #if defined(PETSC_HAVE_ADIC)
4187 #undef __FUNCT__
4188 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4189 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4190 {
4191   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4192   PetscErrorCode ierr;
4193 
4194   PetscFunctionBegin;
4195   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4196   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4197   PetscFunctionReturn(0);
4198 }
4199 #endif
4200 
4201 #undef __FUNCT__
4202 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4203 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4204 {
4205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4206   PetscErrorCode ierr;
4207 
4208   PetscFunctionBegin;
4209   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4210   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 #undef __FUNCT__
4215 #define __FUNCT__ "MatMerge"
4216 /*@
4217       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4218                  matrices from each processor
4219 
4220     Collective on MPI_Comm
4221 
4222    Input Parameters:
4223 +    comm - the communicators the parallel matrix will live on
4224 .    inmat - the input sequential matrices
4225 .    n - number of local columns (or PETSC_DECIDE)
4226 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4227 
4228    Output Parameter:
4229 .    outmat - the parallel matrix generated
4230 
4231     Level: advanced
4232 
4233    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4234 
4235 @*/
4236 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4237 {
4238   PetscErrorCode ierr;
4239   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4240   PetscInt       *indx;
4241   PetscScalar    *values;
4242 
4243   PetscFunctionBegin;
4244   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4245   if (scall == MAT_INITIAL_MATRIX){
4246     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4247     if (n == PETSC_DECIDE){
4248       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4249     }
4250     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4251     rstart -= m;
4252 
4253     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4254     for (i=0;i<m;i++) {
4255       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4256       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4257       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4258     }
4259     /* This routine will ONLY return MPIAIJ type matrix */
4260     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4261     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4262     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4263     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4264     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4265 
4266   } else if (scall == MAT_REUSE_MATRIX){
4267     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4268   } else {
4269     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4270   }
4271 
4272   for (i=0;i<m;i++) {
4273     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4274     Ii    = i + rstart;
4275     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4276     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4277   }
4278   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4279   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4280   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4281 
4282   PetscFunctionReturn(0);
4283 }
4284 
4285 #undef __FUNCT__
4286 #define __FUNCT__ "MatFileSplit"
4287 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4288 {
4289   PetscErrorCode    ierr;
4290   PetscMPIInt       rank;
4291   PetscInt          m,N,i,rstart,nnz;
4292   size_t            len;
4293   const PetscInt    *indx;
4294   PetscViewer       out;
4295   char              *name;
4296   Mat               B;
4297   const PetscScalar *values;
4298 
4299   PetscFunctionBegin;
4300   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4301   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4302   /* Should this be the type of the diagonal block of A? */
4303   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4304   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4305   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4306   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4307   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4308   for (i=0;i<m;i++) {
4309     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4310     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4311     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4312   }
4313   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4314   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4315 
4316   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4317   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4318   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4319   sprintf(name,"%s.%d",outfile,rank);
4320   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4321   ierr = PetscFree(name);
4322   ierr = MatView(B,out);CHKERRQ(ierr);
4323   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4324   ierr = MatDestroy(B);CHKERRQ(ierr);
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4329 #undef __FUNCT__
4330 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4331 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4332 {
4333   PetscErrorCode       ierr;
4334   Mat_Merge_SeqsToMPI  *merge;
4335   PetscContainer       container;
4336 
4337   PetscFunctionBegin;
4338   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4339   if (container) {
4340     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4341     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4342     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4343     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4344     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4347     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4348     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4349     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4350     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4351     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4352     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4353     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4354 
4355     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4356     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4357   }
4358   ierr = PetscFree(merge);CHKERRQ(ierr);
4359 
4360   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #include "../src/mat/utils/freespace.h"
4365 #include "petscbt.h"
4366 
4367 #undef __FUNCT__
4368 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4369 /*@C
4370       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4371                  matrices from each processor
4372 
4373     Collective on MPI_Comm
4374 
4375    Input Parameters:
4376 +    comm - the communicators the parallel matrix will live on
4377 .    seqmat - the input sequential matrices
4378 .    m - number of local rows (or PETSC_DECIDE)
4379 .    n - number of local columns (or PETSC_DECIDE)
4380 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4381 
4382    Output Parameter:
4383 .    mpimat - the parallel matrix generated
4384 
4385     Level: advanced
4386 
4387    Notes:
4388      The dimensions of the sequential matrix in each processor MUST be the same.
4389      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4390      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4391 @*/
4392 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4393 {
4394   PetscErrorCode       ierr;
4395   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4396   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4397   PetscMPIInt          size,rank,taga,*len_s;
4398   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4399   PetscInt             proc,m;
4400   PetscInt             **buf_ri,**buf_rj;
4401   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4402   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4403   MPI_Request          *s_waits,*r_waits;
4404   MPI_Status           *status;
4405   MatScalar            *aa=a->a;
4406   MatScalar            **abuf_r,*ba_i;
4407   Mat_Merge_SeqsToMPI  *merge;
4408   PetscContainer       container;
4409 
4410   PetscFunctionBegin;
4411   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4412 
4413   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4414   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4415 
4416   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4417   if (container) {
4418     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4419   }
4420   bi     = merge->bi;
4421   bj     = merge->bj;
4422   buf_ri = merge->buf_ri;
4423   buf_rj = merge->buf_rj;
4424 
4425   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4426   owners = merge->rowmap->range;
4427   len_s  = merge->len_s;
4428 
4429   /* send and recv matrix values */
4430   /*-----------------------------*/
4431   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4432   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4433 
4434   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4435   for (proc=0,k=0; proc<size; proc++){
4436     if (!len_s[proc]) continue;
4437     i = owners[proc];
4438     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4439     k++;
4440   }
4441 
4442   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4443   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4444   ierr = PetscFree(status);CHKERRQ(ierr);
4445 
4446   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4447   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4448 
4449   /* insert mat values of mpimat */
4450   /*----------------------------*/
4451   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4452   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4453 
4454   for (k=0; k<merge->nrecv; k++){
4455     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4456     nrows = *(buf_ri_k[k]);
4457     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4458     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4459   }
4460 
4461   /* set values of ba */
4462   m = merge->rowmap->n;
4463   for (i=0; i<m; i++) {
4464     arow = owners[rank] + i;
4465     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4466     bnzi = bi[i+1] - bi[i];
4467     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4468 
4469     /* add local non-zero vals of this proc's seqmat into ba */
4470     anzi = ai[arow+1] - ai[arow];
4471     aj   = a->j + ai[arow];
4472     aa   = a->a + ai[arow];
4473     nextaj = 0;
4474     for (j=0; nextaj<anzi; j++){
4475       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4476         ba_i[j] += aa[nextaj++];
4477       }
4478     }
4479 
4480     /* add received vals into ba */
4481     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4482       /* i-th row */
4483       if (i == *nextrow[k]) {
4484         anzi = *(nextai[k]+1) - *nextai[k];
4485         aj   = buf_rj[k] + *(nextai[k]);
4486         aa   = abuf_r[k] + *(nextai[k]);
4487         nextaj = 0;
4488         for (j=0; nextaj<anzi; j++){
4489           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4490             ba_i[j] += aa[nextaj++];
4491           }
4492         }
4493         nextrow[k]++; nextai[k]++;
4494       }
4495     }
4496     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4497   }
4498   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4499   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4500 
4501   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4502   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4503   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4504   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4505   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 #undef __FUNCT__
4510 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4511 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4512 {
4513   PetscErrorCode       ierr;
4514   Mat                  B_mpi;
4515   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4516   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4517   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4518   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4519   PetscInt             len,proc,*dnz,*onz;
4520   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4521   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4522   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4523   MPI_Status           *status;
4524   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4525   PetscBT              lnkbt;
4526   Mat_Merge_SeqsToMPI  *merge;
4527   PetscContainer       container;
4528 
4529   PetscFunctionBegin;
4530   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4531 
4532   /* make sure it is a PETSc comm */
4533   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4535   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4536 
4537   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4538   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4539 
4540   /* determine row ownership */
4541   /*---------------------------------------------------------*/
4542   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4543   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4544   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4545   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4546   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4547   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4548   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4549 
4550   m      = merge->rowmap->n;
4551   M      = merge->rowmap->N;
4552   owners = merge->rowmap->range;
4553 
4554   /* determine the number of messages to send, their lengths */
4555   /*---------------------------------------------------------*/
4556   len_s  = merge->len_s;
4557 
4558   len = 0;  /* length of buf_si[] */
4559   merge->nsend = 0;
4560   for (proc=0; proc<size; proc++){
4561     len_si[proc] = 0;
4562     if (proc == rank){
4563       len_s[proc] = 0;
4564     } else {
4565       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4566       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4567     }
4568     if (len_s[proc]) {
4569       merge->nsend++;
4570       nrows = 0;
4571       for (i=owners[proc]; i<owners[proc+1]; i++){
4572         if (ai[i+1] > ai[i]) nrows++;
4573       }
4574       len_si[proc] = 2*(nrows+1);
4575       len += len_si[proc];
4576     }
4577   }
4578 
4579   /* determine the number and length of messages to receive for ij-structure */
4580   /*-------------------------------------------------------------------------*/
4581   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4582   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4583 
4584   /* post the Irecv of j-structure */
4585   /*-------------------------------*/
4586   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4587   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4588 
4589   /* post the Isend of j-structure */
4590   /*--------------------------------*/
4591   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4592 
4593   for (proc=0, k=0; proc<size; proc++){
4594     if (!len_s[proc]) continue;
4595     i = owners[proc];
4596     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4597     k++;
4598   }
4599 
4600   /* receives and sends of j-structure are complete */
4601   /*------------------------------------------------*/
4602   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4603   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4604 
4605   /* send and recv i-structure */
4606   /*---------------------------*/
4607   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4608   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4609 
4610   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4611   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4612   for (proc=0,k=0; proc<size; proc++){
4613     if (!len_s[proc]) continue;
4614     /* form outgoing message for i-structure:
4615          buf_si[0]:                 nrows to be sent
4616                [1:nrows]:           row index (global)
4617                [nrows+1:2*nrows+1]: i-structure index
4618     */
4619     /*-------------------------------------------*/
4620     nrows = len_si[proc]/2 - 1;
4621     buf_si_i    = buf_si + nrows+1;
4622     buf_si[0]   = nrows;
4623     buf_si_i[0] = 0;
4624     nrows = 0;
4625     for (i=owners[proc]; i<owners[proc+1]; i++){
4626       anzi = ai[i+1] - ai[i];
4627       if (anzi) {
4628         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4629         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4630         nrows++;
4631       }
4632     }
4633     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4634     k++;
4635     buf_si += len_si[proc];
4636   }
4637 
4638   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4639   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4640 
4641   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4642   for (i=0; i<merge->nrecv; i++){
4643     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4644   }
4645 
4646   ierr = PetscFree(len_si);CHKERRQ(ierr);
4647   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4648   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4649   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4650   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4651   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4652   ierr = PetscFree(status);CHKERRQ(ierr);
4653 
4654   /* compute a local seq matrix in each processor */
4655   /*----------------------------------------------*/
4656   /* allocate bi array and free space for accumulating nonzero column info */
4657   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4658   bi[0] = 0;
4659 
4660   /* create and initialize a linked list */
4661   nlnk = N+1;
4662   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4663 
4664   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4665   len = 0;
4666   len  = ai[owners[rank+1]] - ai[owners[rank]];
4667   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4668   current_space = free_space;
4669 
4670   /* determine symbolic info for each local row */
4671   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4672 
4673   for (k=0; k<merge->nrecv; k++){
4674     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4675     nrows = *buf_ri_k[k];
4676     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4677     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4678   }
4679 
4680   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4681   len = 0;
4682   for (i=0;i<m;i++) {
4683     bnzi   = 0;
4684     /* add local non-zero cols of this proc's seqmat into lnk */
4685     arow   = owners[rank] + i;
4686     anzi   = ai[arow+1] - ai[arow];
4687     aj     = a->j + ai[arow];
4688     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4689     bnzi += nlnk;
4690     /* add received col data into lnk */
4691     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4692       if (i == *nextrow[k]) { /* i-th row */
4693         anzi = *(nextai[k]+1) - *nextai[k];
4694         aj   = buf_rj[k] + *nextai[k];
4695         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4696         bnzi += nlnk;
4697         nextrow[k]++; nextai[k]++;
4698       }
4699     }
4700     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4701 
4702     /* if free space is not available, make more free space */
4703     if (current_space->local_remaining<bnzi) {
4704       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4705       nspacedouble++;
4706     }
4707     /* copy data into free space, then initialize lnk */
4708     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4709     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4710 
4711     current_space->array           += bnzi;
4712     current_space->local_used      += bnzi;
4713     current_space->local_remaining -= bnzi;
4714 
4715     bi[i+1] = bi[i] + bnzi;
4716   }
4717 
4718   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4719 
4720   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4721   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4722   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4723 
4724   /* create symbolic parallel matrix B_mpi */
4725   /*---------------------------------------*/
4726   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4727   if (n==PETSC_DECIDE) {
4728     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4729   } else {
4730     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4731   }
4732   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4733   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4734   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4735 
4736   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4737   B_mpi->assembled     = PETSC_FALSE;
4738   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4739   merge->bi            = bi;
4740   merge->bj            = bj;
4741   merge->buf_ri        = buf_ri;
4742   merge->buf_rj        = buf_rj;
4743   merge->coi           = PETSC_NULL;
4744   merge->coj           = PETSC_NULL;
4745   merge->owners_co     = PETSC_NULL;
4746 
4747   /* attach the supporting struct to B_mpi for reuse */
4748   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4749   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4750   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4751   *mpimat = B_mpi;
4752 
4753   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4754   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4755   PetscFunctionReturn(0);
4756 }
4757 
4758 #undef __FUNCT__
4759 #define __FUNCT__ "MatMerge_SeqsToMPI"
4760 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4761 {
4762   PetscErrorCode   ierr;
4763 
4764   PetscFunctionBegin;
4765   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4766   if (scall == MAT_INITIAL_MATRIX){
4767     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4768   }
4769   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4770   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4771   PetscFunctionReturn(0);
4772 }
4773 
4774 #undef __FUNCT__
4775 #define __FUNCT__ "MatGetLocalMat"
4776 /*@
4777      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4778 
4779     Not Collective
4780 
4781    Input Parameters:
4782 +    A - the matrix
4783 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4784 
4785    Output Parameter:
4786 .    A_loc - the local sequential matrix generated
4787 
4788     Level: developer
4789 
4790 @*/
4791 PetscErrorCode  MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4792 {
4793   PetscErrorCode  ierr;
4794   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4795   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4796   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4797   MatScalar       *aa=a->a,*ba=b->a,*cam;
4798   PetscScalar     *ca;
4799   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4800   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4801 
4802   PetscFunctionBegin;
4803   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4804   if (scall == MAT_INITIAL_MATRIX){
4805     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4806     ci[0] = 0;
4807     for (i=0; i<am; i++){
4808       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4809     }
4810     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4811     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4812     k = 0;
4813     for (i=0; i<am; i++) {
4814       ncols_o = bi[i+1] - bi[i];
4815       ncols_d = ai[i+1] - ai[i];
4816       /* off-diagonal portion of A */
4817       for (jo=0; jo<ncols_o; jo++) {
4818         col = cmap[*bj];
4819         if (col >= cstart) break;
4820         cj[k]   = col; bj++;
4821         ca[k++] = *ba++;
4822       }
4823       /* diagonal portion of A */
4824       for (j=0; j<ncols_d; j++) {
4825         cj[k]   = cstart + *aj++;
4826         ca[k++] = *aa++;
4827       }
4828       /* off-diagonal portion of A */
4829       for (j=jo; j<ncols_o; j++) {
4830         cj[k]   = cmap[*bj++];
4831         ca[k++] = *ba++;
4832       }
4833     }
4834     /* put together the new matrix */
4835     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4836     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4837     /* Since these are PETSc arrays, change flags to free them as necessary. */
4838     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4839     mat->free_a  = PETSC_TRUE;
4840     mat->free_ij = PETSC_TRUE;
4841     mat->nonew   = 0;
4842   } else if (scall == MAT_REUSE_MATRIX){
4843     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4844     ci = mat->i; cj = mat->j; cam = mat->a;
4845     for (i=0; i<am; i++) {
4846       /* off-diagonal portion of A */
4847       ncols_o = bi[i+1] - bi[i];
4848       for (jo=0; jo<ncols_o; jo++) {
4849         col = cmap[*bj];
4850         if (col >= cstart) break;
4851         *cam++ = *ba++; bj++;
4852       }
4853       /* diagonal portion of A */
4854       ncols_d = ai[i+1] - ai[i];
4855       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4856       /* off-diagonal portion of A */
4857       for (j=jo; j<ncols_o; j++) {
4858         *cam++ = *ba++; bj++;
4859       }
4860     }
4861   } else {
4862     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4863   }
4864 
4865   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4866   PetscFunctionReturn(0);
4867 }
4868 
4869 #undef __FUNCT__
4870 #define __FUNCT__ "MatGetLocalMatCondensed"
4871 /*@C
4872      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4873 
4874     Not Collective
4875 
4876    Input Parameters:
4877 +    A - the matrix
4878 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4879 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4880 
4881    Output Parameter:
4882 .    A_loc - the local sequential matrix generated
4883 
4884     Level: developer
4885 
4886 @*/
4887 PetscErrorCode  MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4888 {
4889   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4890   PetscErrorCode    ierr;
4891   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4892   IS                isrowa,iscola;
4893   Mat               *aloc;
4894 
4895   PetscFunctionBegin;
4896   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4897   if (!row){
4898     start = A->rmap->rstart; end = A->rmap->rend;
4899     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4900   } else {
4901     isrowa = *row;
4902   }
4903   if (!col){
4904     start = A->cmap->rstart;
4905     cmap  = a->garray;
4906     nzA   = a->A->cmap->n;
4907     nzB   = a->B->cmap->n;
4908     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4909     ncols = 0;
4910     for (i=0; i<nzB; i++) {
4911       if (cmap[i] < start) idx[ncols++] = cmap[i];
4912       else break;
4913     }
4914     imark = i;
4915     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4916     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4917     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4918   } else {
4919     iscola = *col;
4920   }
4921   if (scall != MAT_INITIAL_MATRIX){
4922     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4923     aloc[0] = *A_loc;
4924   }
4925   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4926   *A_loc = aloc[0];
4927   ierr = PetscFree(aloc);CHKERRQ(ierr);
4928   if (!row){
4929     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4930   }
4931   if (!col){
4932     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4933   }
4934   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4935   PetscFunctionReturn(0);
4936 }
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatGetBrowsOfAcols"
4940 /*@C
4941     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4942 
4943     Collective on Mat
4944 
4945    Input Parameters:
4946 +    A,B - the matrices in mpiaij format
4947 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4948 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4949 
4950    Output Parameter:
4951 +    rowb, colb - index sets of rows and columns of B to extract
4952 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4953 -    B_seq - the sequential matrix generated
4954 
4955     Level: developer
4956 
4957 @*/
4958 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4959 {
4960   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4961   PetscErrorCode    ierr;
4962   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4963   IS                isrowb,iscolb;
4964   Mat               *bseq;
4965 
4966   PetscFunctionBegin;
4967   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4968     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4969   }
4970   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4971 
4972   if (scall == MAT_INITIAL_MATRIX){
4973     start = A->cmap->rstart;
4974     cmap  = a->garray;
4975     nzA   = a->A->cmap->n;
4976     nzB   = a->B->cmap->n;
4977     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4978     ncols = 0;
4979     for (i=0; i<nzB; i++) {  /* row < local row index */
4980       if (cmap[i] < start) idx[ncols++] = cmap[i];
4981       else break;
4982     }
4983     imark = i;
4984     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4985     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4986     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4987     *brstart = imark;
4988     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4989   } else {
4990     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4991     isrowb = *rowb; iscolb = *colb;
4992     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4993     bseq[0] = *B_seq;
4994   }
4995   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4996   *B_seq = bseq[0];
4997   ierr = PetscFree(bseq);CHKERRQ(ierr);
4998   if (!rowb){
4999     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
5000   } else {
5001     *rowb = isrowb;
5002   }
5003   if (!colb){
5004     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
5005   } else {
5006     *colb = iscolb;
5007   }
5008   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5009   PetscFunctionReturn(0);
5010 }
5011 
5012 #undef __FUNCT__
5013 #define __FUNCT__ "MatGetBrowsOfAoCols"
5014 /*@C
5015     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5016     of the OFF-DIAGONAL portion of local A
5017 
5018     Collective on Mat
5019 
5020    Input Parameters:
5021 +    A,B - the matrices in mpiaij format
5022 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5023 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5024 .    startsj_r - similar to startsj for receives
5025 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5026 
5027    Output Parameter:
5028 +    B_oth - the sequential matrix generated
5029 
5030     Level: developer
5031 
5032 @*/
5033 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5034 {
5035   VecScatter_MPI_General *gen_to,*gen_from;
5036   PetscErrorCode         ierr;
5037   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5038   Mat_SeqAIJ             *b_oth;
5039   VecScatter             ctx=a->Mvctx;
5040   MPI_Comm               comm=((PetscObject)ctx)->comm;
5041   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5042   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5043   PetscScalar            *rvalues,*svalues;
5044   MatScalar              *b_otha,*bufa,*bufA;
5045   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5046   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5047   MPI_Status             *sstatus,rstatus;
5048   PetscMPIInt            jj;
5049   PetscInt               *cols,sbs,rbs;
5050   PetscScalar            *vals;
5051 
5052   PetscFunctionBegin;
5053   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5054     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5055   }
5056   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5057   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5058 
5059   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5060   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5061   rvalues  = gen_from->values; /* holds the length of receiving row */
5062   svalues  = gen_to->values;   /* holds the length of sending row */
5063   nrecvs   = gen_from->n;
5064   nsends   = gen_to->n;
5065 
5066   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5067   srow     = gen_to->indices;   /* local row index to be sent */
5068   sstarts  = gen_to->starts;
5069   sprocs   = gen_to->procs;
5070   sstatus  = gen_to->sstatus;
5071   sbs      = gen_to->bs;
5072   rstarts  = gen_from->starts;
5073   rprocs   = gen_from->procs;
5074   rbs      = gen_from->bs;
5075 
5076   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5077   if (scall == MAT_INITIAL_MATRIX){
5078     /* i-array */
5079     /*---------*/
5080     /*  post receives */
5081     for (i=0; i<nrecvs; i++){
5082       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5083       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5084       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5085     }
5086 
5087     /* pack the outgoing message */
5088     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5089     sstartsj[0] = 0;  rstartsj[0] = 0;
5090     len = 0; /* total length of j or a array to be sent */
5091     k = 0;
5092     for (i=0; i<nsends; i++){
5093       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5094       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5095       for (j=0; j<nrows; j++) {
5096         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5097         for (l=0; l<sbs; l++){
5098           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5099           rowlen[j*sbs+l] = ncols;
5100           len += ncols;
5101           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5102         }
5103         k++;
5104       }
5105       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5106       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5107     }
5108     /* recvs and sends of i-array are completed */
5109     i = nrecvs;
5110     while (i--) {
5111       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5112     }
5113     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5114 
5115     /* allocate buffers for sending j and a arrays */
5116     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5117     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5118 
5119     /* create i-array of B_oth */
5120     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5121     b_othi[0] = 0;
5122     len = 0; /* total length of j or a array to be received */
5123     k = 0;
5124     for (i=0; i<nrecvs; i++){
5125       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5126       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5127       for (j=0; j<nrows; j++) {
5128         b_othi[k+1] = b_othi[k] + rowlen[j];
5129         len += rowlen[j]; k++;
5130       }
5131       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5132     }
5133 
5134     /* allocate space for j and a arrrays of B_oth */
5135     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5136     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5137 
5138     /* j-array */
5139     /*---------*/
5140     /*  post receives of j-array */
5141     for (i=0; i<nrecvs; i++){
5142       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5143       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5144     }
5145 
5146     /* pack the outgoing message j-array */
5147     k = 0;
5148     for (i=0; i<nsends; i++){
5149       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5150       bufJ = bufj+sstartsj[i];
5151       for (j=0; j<nrows; j++) {
5152         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5153         for (ll=0; ll<sbs; ll++){
5154           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5155           for (l=0; l<ncols; l++){
5156             *bufJ++ = cols[l];
5157           }
5158           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5159         }
5160       }
5161       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5162     }
5163 
5164     /* recvs and sends of j-array are completed */
5165     i = nrecvs;
5166     while (i--) {
5167       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5168     }
5169     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5170   } else if (scall == MAT_REUSE_MATRIX){
5171     sstartsj = *startsj;
5172     rstartsj = *startsj_r;
5173     bufa     = *bufa_ptr;
5174     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5175     b_otha   = b_oth->a;
5176   } else {
5177     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5178   }
5179 
5180   /* a-array */
5181   /*---------*/
5182   /*  post receives of a-array */
5183   for (i=0; i<nrecvs; i++){
5184     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5185     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5186   }
5187 
5188   /* pack the outgoing message a-array */
5189   k = 0;
5190   for (i=0; i<nsends; i++){
5191     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5192     bufA = bufa+sstartsj[i];
5193     for (j=0; j<nrows; j++) {
5194       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5195       for (ll=0; ll<sbs; ll++){
5196         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5197         for (l=0; l<ncols; l++){
5198           *bufA++ = vals[l];
5199         }
5200         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5201       }
5202     }
5203     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5204   }
5205   /* recvs and sends of a-array are completed */
5206   i = nrecvs;
5207   while (i--) {
5208     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5209   }
5210   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5211   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5212 
5213   if (scall == MAT_INITIAL_MATRIX){
5214     /* put together the new matrix */
5215     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5216 
5217     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5218     /* Since these are PETSc arrays, change flags to free them as necessary. */
5219     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5220     b_oth->free_a  = PETSC_TRUE;
5221     b_oth->free_ij = PETSC_TRUE;
5222     b_oth->nonew   = 0;
5223 
5224     ierr = PetscFree(bufj);CHKERRQ(ierr);
5225     if (!startsj || !bufa_ptr){
5226       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5227       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5228     } else {
5229       *startsj   = sstartsj;
5230       *startsj_r = rstartsj;
5231       *bufa_ptr  = bufa;
5232     }
5233   }
5234   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5235   PetscFunctionReturn(0);
5236 }
5237 
5238 #undef __FUNCT__
5239 #define __FUNCT__ "MatGetCommunicationStructs"
5240 /*@C
5241   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5242 
5243   Not Collective
5244 
5245   Input Parameters:
5246 . A - The matrix in mpiaij format
5247 
5248   Output Parameter:
5249 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5250 . colmap - A map from global column index to local index into lvec
5251 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5252 
5253   Level: developer
5254 
5255 @*/
5256 #if defined (PETSC_USE_CTABLE)
5257 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5258 #else
5259 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5260 #endif
5261 {
5262   Mat_MPIAIJ *a;
5263 
5264   PetscFunctionBegin;
5265   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5266   PetscValidPointer(lvec, 2);
5267   PetscValidPointer(colmap, 3);
5268   PetscValidPointer(multScatter, 4);
5269   a = (Mat_MPIAIJ *) A->data;
5270   if (lvec) *lvec = a->lvec;
5271   if (colmap) *colmap = a->colmap;
5272   if (multScatter) *multScatter = a->Mvctx;
5273   PetscFunctionReturn(0);
5274 }
5275 
5276 EXTERN_C_BEGIN
5277 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5278 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5279 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5280 EXTERN_C_END
5281 
5282 #undef __FUNCT__
5283 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5284 /*
5285     Computes (B'*A')' since computing B*A directly is untenable
5286 
5287                n                       p                          p
5288         (              )       (              )         (                  )
5289       m (      A       )  *  n (       B      )   =   m (         C        )
5290         (              )       (              )         (                  )
5291 
5292 */
5293 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5294 {
5295   PetscErrorCode     ierr;
5296   Mat                At,Bt,Ct;
5297 
5298   PetscFunctionBegin;
5299   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5300   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5301   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5302   ierr = MatDestroy(At);CHKERRQ(ierr);
5303   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5304   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5305   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5306   PetscFunctionReturn(0);
5307 }
5308 
5309 #undef __FUNCT__
5310 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5311 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5312 {
5313   PetscErrorCode ierr;
5314   PetscInt       m=A->rmap->n,n=B->cmap->n;
5315   Mat            Cmat;
5316 
5317   PetscFunctionBegin;
5318   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5319   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5320   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5321   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5322   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5323   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5324   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5325   *C   = Cmat;
5326   PetscFunctionReturn(0);
5327 }
5328 
5329 /* ----------------------------------------------------------------*/
5330 #undef __FUNCT__
5331 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5332 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5333 {
5334   PetscErrorCode ierr;
5335 
5336   PetscFunctionBegin;
5337   if (scall == MAT_INITIAL_MATRIX){
5338     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5339   }
5340   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5341   PetscFunctionReturn(0);
5342 }
5343 
5344 EXTERN_C_BEGIN
5345 #if defined(PETSC_HAVE_MUMPS)
5346 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5347 #endif
5348 #if defined(PETSC_HAVE_PASTIX)
5349 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5350 #endif
5351 #if defined(PETSC_HAVE_SUPERLU_DIST)
5352 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5353 #endif
5354 #if defined(PETSC_HAVE_SPOOLES)
5355 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5356 #endif
5357 EXTERN_C_END
5358 
5359 /*MC
5360    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5361 
5362    Options Database Keys:
5363 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5364 
5365   Level: beginner
5366 
5367 .seealso: MatCreateMPIAIJ()
5368 M*/
5369 
5370 EXTERN_C_BEGIN
5371 #undef __FUNCT__
5372 #define __FUNCT__ "MatCreate_MPIAIJ"
5373 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5374 {
5375   Mat_MPIAIJ     *b;
5376   PetscErrorCode ierr;
5377   PetscMPIInt    size;
5378 
5379   PetscFunctionBegin;
5380   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5381 
5382   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5383   B->data         = (void*)b;
5384   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5385   B->rmap->bs     = 1;
5386   B->assembled    = PETSC_FALSE;
5387 
5388   B->insertmode   = NOT_SET_VALUES;
5389   b->size         = size;
5390   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5391 
5392   /* build cache for off array entries formed */
5393   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5394   b->donotstash  = PETSC_FALSE;
5395   b->colmap      = 0;
5396   b->garray      = 0;
5397   b->roworiented = PETSC_TRUE;
5398 
5399   /* stuff used for matrix vector multiply */
5400   b->lvec      = PETSC_NULL;
5401   b->Mvctx     = PETSC_NULL;
5402 
5403   /* stuff for MatGetRow() */
5404   b->rowindices   = 0;
5405   b->rowvalues    = 0;
5406   b->getrowactive = PETSC_FALSE;
5407 
5408 #if defined(PETSC_HAVE_SPOOLES)
5409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5410                                      "MatGetFactor_mpiaij_spooles",
5411                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5412 #endif
5413 #if defined(PETSC_HAVE_MUMPS)
5414   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5415                                      "MatGetFactor_aij_mumps",
5416                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5417 #endif
5418 #if defined(PETSC_HAVE_PASTIX)
5419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5420 					   "MatGetFactor_mpiaij_pastix",
5421 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5422 #endif
5423 #if defined(PETSC_HAVE_SUPERLU_DIST)
5424   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5425                                      "MatGetFactor_mpiaij_superlu_dist",
5426                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5427 #endif
5428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5429                                      "MatStoreValues_MPIAIJ",
5430                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5431   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5432                                      "MatRetrieveValues_MPIAIJ",
5433                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5435 				     "MatGetDiagonalBlock_MPIAIJ",
5436                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5437   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5438 				     "MatIsTranspose_MPIAIJ",
5439 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5440   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5441 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5442 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5443   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5444 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5445 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5446   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5447 				     "MatDiagonalScaleLocal_MPIAIJ",
5448 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5449   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5450                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5451                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5452   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5453                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5454                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5455   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5456                                      "MatConvert_MPIAIJ_MPISBAIJ",
5457                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5458   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5459                                      "MatMatMult_MPIDense_MPIAIJ",
5460                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5461   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5462                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5463                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5464   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5465                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5466                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5467   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5468   PetscFunctionReturn(0);
5469 }
5470 EXTERN_C_END
5471 
5472 #undef __FUNCT__
5473 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5474 /*@
5475      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5476          and "off-diagonal" part of the matrix in CSR format.
5477 
5478    Collective on MPI_Comm
5479 
5480    Input Parameters:
5481 +  comm - MPI communicator
5482 .  m - number of local rows (Cannot be PETSC_DECIDE)
5483 .  n - This value should be the same as the local size used in creating the
5484        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5485        calculated if N is given) For square matrices n is almost always m.
5486 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5487 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5488 .   i - row indices for "diagonal" portion of matrix
5489 .   j - column indices
5490 .   a - matrix values
5491 .   oi - row indices for "off-diagonal" portion of matrix
5492 .   oj - column indices
5493 -   oa - matrix values
5494 
5495    Output Parameter:
5496 .   mat - the matrix
5497 
5498    Level: advanced
5499 
5500    Notes:
5501        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5502        must free the arrays once the matrix has been destroyed and not before.
5503 
5504        The i and j indices are 0 based
5505 
5506        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5507 
5508        This sets local rows and cannot be used to set off-processor values.
5509 
5510        You cannot later use MatSetValues() to change values in this matrix.
5511 
5512 .keywords: matrix, aij, compressed row, sparse, parallel
5513 
5514 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5515           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5516 @*/
5517 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5518 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5519 {
5520   PetscErrorCode ierr;
5521   Mat_MPIAIJ     *maij;
5522 
5523  PetscFunctionBegin;
5524   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5525   if (i[0]) {
5526     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5527   }
5528   if (oi[0]) {
5529     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5530   }
5531   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5532   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5533   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5534   maij = (Mat_MPIAIJ*) (*mat)->data;
5535   maij->donotstash     = PETSC_TRUE;
5536   (*mat)->preallocated = PETSC_TRUE;
5537 
5538   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5539   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5540   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5541   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5542 
5543   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5544   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5545 
5546   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5547   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5548   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5549   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5550 
5551   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5553   PetscFunctionReturn(0);
5554 }
5555 
5556 /*
5557     Special version for direct calls from Fortran
5558 */
5559 #include "private/fortranimpl.h"
5560 
5561 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5562 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5563 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5564 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5565 #endif
5566 
5567 /* Change these macros so can be used in void function */
5568 #undef CHKERRQ
5569 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5570 #undef SETERRQ2
5571 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5572 #undef SETERRQ
5573 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5574 
5575 EXTERN_C_BEGIN
5576 #undef __FUNCT__
5577 #define __FUNCT__ "matsetvaluesmpiaij_"
5578 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5579 {
5580   Mat             mat = *mmat;
5581   PetscInt        m = *mm, n = *mn;
5582   InsertMode      addv = *maddv;
5583   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5584   PetscScalar     value;
5585   PetscErrorCode  ierr;
5586 
5587   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5588   if (mat->insertmode == NOT_SET_VALUES) {
5589     mat->insertmode = addv;
5590   }
5591 #if defined(PETSC_USE_DEBUG)
5592   else if (mat->insertmode != addv) {
5593     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5594   }
5595 #endif
5596   {
5597   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5598   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5599   PetscBool       roworiented = aij->roworiented;
5600 
5601   /* Some Variables required in the macro */
5602   Mat             A = aij->A;
5603   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5604   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5605   MatScalar       *aa = a->a;
5606   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5607   Mat             B = aij->B;
5608   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5609   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5610   MatScalar       *ba = b->a;
5611 
5612   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5613   PetscInt        nonew = a->nonew;
5614   MatScalar       *ap1,*ap2;
5615 
5616   PetscFunctionBegin;
5617   for (i=0; i<m; i++) {
5618     if (im[i] < 0) continue;
5619 #if defined(PETSC_USE_DEBUG)
5620     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5621 #endif
5622     if (im[i] >= rstart && im[i] < rend) {
5623       row      = im[i] - rstart;
5624       lastcol1 = -1;
5625       rp1      = aj + ai[row];
5626       ap1      = aa + ai[row];
5627       rmax1    = aimax[row];
5628       nrow1    = ailen[row];
5629       low1     = 0;
5630       high1    = nrow1;
5631       lastcol2 = -1;
5632       rp2      = bj + bi[row];
5633       ap2      = ba + bi[row];
5634       rmax2    = bimax[row];
5635       nrow2    = bilen[row];
5636       low2     = 0;
5637       high2    = nrow2;
5638 
5639       for (j=0; j<n; j++) {
5640         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5641         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5642         if (in[j] >= cstart && in[j] < cend){
5643           col = in[j] - cstart;
5644           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5645         } else if (in[j] < 0) continue;
5646 #if defined(PETSC_USE_DEBUG)
5647         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5648 #endif
5649         else {
5650           if (mat->was_assembled) {
5651             if (!aij->colmap) {
5652               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5653             }
5654 #if defined (PETSC_USE_CTABLE)
5655             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5656 	    col--;
5657 #else
5658             col = aij->colmap[in[j]] - 1;
5659 #endif
5660             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5661               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5662               col =  in[j];
5663               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5664               B = aij->B;
5665               b = (Mat_SeqAIJ*)B->data;
5666               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5667               rp2      = bj + bi[row];
5668               ap2      = ba + bi[row];
5669               rmax2    = bimax[row];
5670               nrow2    = bilen[row];
5671               low2     = 0;
5672               high2    = nrow2;
5673               bm       = aij->B->rmap->n;
5674               ba = b->a;
5675             }
5676           } else col = in[j];
5677           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5678         }
5679       }
5680     } else {
5681       if (!aij->donotstash) {
5682         if (roworiented) {
5683           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5684         } else {
5685           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5686         }
5687       }
5688     }
5689   }}
5690   PetscFunctionReturnVoid();
5691 }
5692 EXTERN_C_END
5693 
5694