xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d155d4104c12796cc6e93a7e2b3cd74e97fcda36)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
8 {
9   PetscErrorCode  ierr;
10   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
11   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
12   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
13   const PetscInt  *ia,*ib;
14   const MatScalar *aa,*bb;
15   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
16   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
17 
18   PetscFunctionBegin;
19   *keptrows = 0;
20   ia = a->i;
21   ib = b->i;
22   for (i=0; i<m; i++) {
23     na = ia[i+1] - ia[i];
24     nb = ib[i+1] - ib[i];
25     if (!na && !nb) {
26       cnt++;
27       goto ok1;
28     }
29     aa = a->a + ia[i];
30     for (j=0; j<na; j++) {
31       if (aa[j] != 0.0) goto ok1;
32     }
33     bb = b->a + ib[i];
34     for (j=0; j <nb; j++) {
35       if (bb[j] != 0.0) goto ok1;
36     }
37     cnt++;
38     ok1:;
39   }
40   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
41   if (!n0rows) PetscFunctionReturn(0);
42   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
43   cnt = 0;
44   for (i=0; i<m; i++) {
45     na = ia[i+1] - ia[i];
46     nb = ib[i+1] - ib[i];
47     if (!na && !nb) continue;
48     aa = a->a + ia[i];
49     for(j=0; j<na;j++) {
50       if (aa[j] != 0.0) {
51         rows[cnt++] = rstart + i;
52         goto ok2;
53       }
54     }
55     bb = b->a + ib[i];
56     for (j=0; j<nb; j++) {
57       if (bb[j] != 0.0) {
58         rows[cnt++] = rstart + i;
59         goto ok2;
60       }
61     }
62     ok2:;
63   }
64   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatDistribute_MPIAIJ"
70 /*
71     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
72     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
73 
74     Only for square matrices
75 */
76 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
77 {
78   PetscMPIInt    rank,size;
79   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
80   PetscErrorCode ierr;
81   Mat            mat;
82   Mat_SeqAIJ     *gmata;
83   PetscMPIInt    tag;
84   MPI_Status     status;
85   PetscBool      aij;
86   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
87 
88   PetscFunctionBegin;
89   CHKMEMQ;
90   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
91   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
92   if (!rank) {
93     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
94     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
95   }
96   if (reuse == MAT_INITIAL_MATRIX) {
97     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
98     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
99     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
100     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
101     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
102     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
103     rowners[0] = 0;
104     for (i=2; i<=size; i++) {
105       rowners[i] += rowners[i-1];
106     }
107     rstart = rowners[rank];
108     rend   = rowners[rank+1];
109     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
110     if (!rank) {
111       gmata = (Mat_SeqAIJ*) gmat->data;
112       /* send row lengths to all processors */
113       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
114       for (i=1; i<size; i++) {
115 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
116       }
117       /* determine number diagonal and off-diagonal counts */
118       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
119       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
120       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
121       jj = 0;
122       for (i=0; i<m; i++) {
123 	for (j=0; j<dlens[i]; j++) {
124           if (gmata->j[jj] < rstart) ld[i]++;
125 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
126 	  jj++;
127 	}
128       }
129       /* send column indices to other processes */
130       for (i=1; i<size; i++) {
131 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
132 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
133 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134       }
135 
136       /* send numerical values to other processes */
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       gmataa = gmata->a;
142       gmataj = gmata->j;
143 
144     } else {
145       /* receive row lengths */
146       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
147       /* receive column indices */
148       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
149       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
150       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
151       /* determine number diagonal and off-diagonal counts */
152       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
153       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
154       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
155       jj = 0;
156       for (i=0; i<m; i++) {
157 	for (j=0; j<dlens[i]; j++) {
158           if (gmataj[jj] < rstart) ld[i]++;
159 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
160 	  jj++;
161 	}
162       }
163       /* receive numerical values */
164       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
165       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
166     }
167     /* set preallocation */
168     for (i=0; i<m; i++) {
169       dlens[i] -= olens[i];
170     }
171     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
172     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
173 
174     for (i=0; i<m; i++) {
175       dlens[i] += olens[i];
176     }
177     cnt  = 0;
178     for (i=0; i<m; i++) {
179       row  = rstart + i;
180       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
181       cnt += dlens[i];
182     }
183     if (rank) {
184       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
185     }
186     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
187     ierr = PetscFree(rowners);CHKERRQ(ierr);
188     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
189     *inmat = mat;
190   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
191     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
192     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
193     mat   = *inmat;
194     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
195     if (!rank) {
196       /* send numerical values to other processes */
197       gmata = (Mat_SeqAIJ*) gmat->data;
198       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
199       gmataa = gmata->a;
200       for (i=1; i<size; i++) {
201         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
202         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
203       }
204       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
205     } else {
206       /* receive numerical values from process 0*/
207       nz   = Ad->nz + Ao->nz;
208       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
209       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
210     }
211     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
212     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
213     ad = Ad->a;
214     ao = Ao->a;
215     if (mat->rmap->n) {
216       i  = 0;
217       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
218       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
219     }
220     for (i=1; i<mat->rmap->n; i++) {
221       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
222       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
223     }
224     i--;
225     if (mat->rmap->n) {
226       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
227     }
228     if (rank) {
229       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
230     }
231   }
232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   CHKMEMQ;
235   PetscFunctionReturn(0);
236 }
237 
238 /*
239   Local utility routine that creates a mapping from the global column
240 number to the local number in the off-diagonal part of the local
241 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
242 a slightly higher hash table cost; without it it is not scalable (each processor
243 has an order N integer array but is fast to acess.
244 */
245 #undef __FUNCT__
246 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
247 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
248 {
249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
250   PetscErrorCode ierr;
251   PetscInt       n = aij->B->cmap->n,i;
252 
253   PetscFunctionBegin;
254 #if defined (PETSC_USE_CTABLE)
255   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
256   for (i=0; i<n; i++){
257     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
258   }
259 #else
260   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
261   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
262   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
264 #endif
265   PetscFunctionReturn(0);
266 }
267 
268 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
269 { \
270     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
271     lastcol1 = col;\
272     while (high1-low1 > 5) { \
273       t = (low1+high1)/2; \
274       if (rp1[t] > col) high1 = t; \
275       else             low1  = t; \
276     } \
277       for (_i=low1; _i<high1; _i++) { \
278         if (rp1[_i] > col) break; \
279         if (rp1[_i] == col) { \
280           if (addv == ADD_VALUES) ap1[_i] += value;   \
281           else                    ap1[_i] = value; \
282           goto a_noinsert; \
283         } \
284       }  \
285       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
286       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
287       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
288       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
289       N = nrow1++ - 1; a->nz++; high1++; \
290       /* shift up all the later entries in this row */ \
291       for (ii=N; ii>=_i; ii--) { \
292         rp1[ii+1] = rp1[ii]; \
293         ap1[ii+1] = ap1[ii]; \
294       } \
295       rp1[_i] = col;  \
296       ap1[_i] = value;  \
297       a_noinsert: ; \
298       ailen[row] = nrow1; \
299 }
300 
301 
302 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
303 { \
304     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
305     lastcol2 = col;\
306     while (high2-low2 > 5) { \
307       t = (low2+high2)/2; \
308       if (rp2[t] > col) high2 = t; \
309       else             low2  = t; \
310     } \
311     for (_i=low2; _i<high2; _i++) {		\
312       if (rp2[_i] > col) break;			\
313       if (rp2[_i] == col) {			      \
314 	if (addv == ADD_VALUES) ap2[_i] += value;     \
315 	else                    ap2[_i] = value;      \
316 	goto b_noinsert;			      \
317       }						      \
318     }							      \
319     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
320     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
321     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
322     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
323     N = nrow2++ - 1; b->nz++; high2++;					\
324     /* shift up all the later entries in this row */			\
325     for (ii=N; ii>=_i; ii--) {						\
326       rp2[ii+1] = rp2[ii];						\
327       ap2[ii+1] = ap2[ii];						\
328     }									\
329     rp2[_i] = col;							\
330     ap2[_i] = value;							\
331     b_noinsert: ;								\
332     bilen[row] = nrow2;							\
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
337 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
338 {
339   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
340   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
341   PetscErrorCode ierr;
342   PetscInt       l,*garray = mat->garray,diag;
343 
344   PetscFunctionBegin;
345   /* code only works for square matrices A */
346 
347   /* find size of row to the left of the diagonal part */
348   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
349   row  = row - diag;
350   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
351     if (garray[b->j[b->i[row]+l]] > diag) break;
352   }
353   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
354 
355   /* diagonal part */
356   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358   /* right of diagonal part */
359   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatSetValues_MPIAIJ"
365 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
366 {
367   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
368   PetscScalar    value;
369   PetscErrorCode ierr;
370   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
371   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
372   PetscBool      roworiented = aij->roworiented;
373 
374   /* Some Variables required in the macro */
375   Mat            A = aij->A;
376   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
377   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
378   MatScalar      *aa = a->a;
379   PetscBool      ignorezeroentries = a->ignorezeroentries;
380   Mat            B = aij->B;
381   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
382   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
383   MatScalar      *ba = b->a;
384 
385   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
386   PetscInt       nonew = a->nonew;
387   MatScalar      *ap1,*ap2;
388 
389   PetscFunctionBegin;
390   if (v) PetscValidScalarPointer(v,6);
391   for (i=0; i<m; i++) {
392     if (im[i] < 0) continue;
393 #if defined(PETSC_USE_DEBUG)
394     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
395 #endif
396     if (im[i] >= rstart && im[i] < rend) {
397       row      = im[i] - rstart;
398       lastcol1 = -1;
399       rp1      = aj + ai[row];
400       ap1      = aa + ai[row];
401       rmax1    = aimax[row];
402       nrow1    = ailen[row];
403       low1     = 0;
404       high1    = nrow1;
405       lastcol2 = -1;
406       rp2      = bj + bi[row];
407       ap2      = ba + bi[row];
408       rmax2    = bimax[row];
409       nrow2    = bilen[row];
410       low2     = 0;
411       high2    = nrow2;
412 
413       for (j=0; j<n; j++) {
414         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
415         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
416         if (in[j] >= cstart && in[j] < cend){
417           col = in[j] - cstart;
418           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!aij->colmap) {
426               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
430 	    col--;
431 #else
432             col = aij->colmap[in[j]] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
435               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
438               B = aij->B;
439               b = (Mat_SeqAIJ*)B->data;
440               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
441               rp2      = bj + bi[row];
442               ap2      = ba + bi[row];
443               rmax2    = bimax[row];
444               nrow2    = bilen[row];
445               low2     = 0;
446               high2    = nrow2;
447               bm       = aij->B->rmap->n;
448               ba = b->a;
449             }
450           } else col = in[j];
451           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
452         }
453       }
454     } else {
455       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
456       if (!aij->donotstash) {
457         if (roworiented) {
458           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
459         } else {
460           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatGetValues_MPIAIJ"
470 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
471 {
472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
473   PetscErrorCode ierr;
474   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
475   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
476 
477   PetscFunctionBegin;
478   for (i=0; i<m; i++) {
479     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
480     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
481     if (idxm[i] >= rstart && idxm[i] < rend) {
482       row = idxm[i] - rstart;
483       for (j=0; j<n; j++) {
484         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
485         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
486         if (idxn[j] >= cstart && idxn[j] < cend){
487           col = idxn[j] - cstart;
488           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
489         } else {
490           if (!aij->colmap) {
491             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
492           }
493 #if defined (PETSC_USE_CTABLE)
494           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
495           col --;
496 #else
497           col = aij->colmap[idxn[j]] - 1;
498 #endif
499           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
500           else {
501             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
502           }
503         }
504       }
505     } else {
506       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
507     }
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
516 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
519   PetscErrorCode ierr;
520   PetscInt       nstash,reallocs;
521   InsertMode     addv;
522 
523   PetscFunctionBegin;
524   if (aij->donotstash || mat->nooffprocentries) {
525     PetscFunctionReturn(0);
526   }
527 
528   /* make sure all processors are either in INSERTMODE or ADDMODE */
529   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
530   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
531   mat->insertmode = addv; /* in case this processor had no cache */
532 
533   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
534   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
535   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
541 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
542 {
543   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
544   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
545   PetscErrorCode ierr;
546   PetscMPIInt    n;
547   PetscInt       i,j,rstart,ncols,flg;
548   PetscInt       *row,*col;
549   PetscBool      other_disassembled;
550   PetscScalar    *val;
551   InsertMode     addv = mat->insertmode;
552 
553   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
554   PetscFunctionBegin;
555   if (!aij->donotstash && !mat->nooffprocentries) {
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         /* Now assemble all these values with a single function call */
566         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
571   }
572   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
589   }
590   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
591   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
596   aij->rowvalues = 0;
597 
598   /* used by MatAXPY() */
599   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
600   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
601 
602   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
603   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
609 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
610 {
611   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
616   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatZeroRows_MPIAIJ"
622 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
623 {
624   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
625   PetscErrorCode    ierr;
626   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
627   PetscInt          i,*owners = A->rmap->range;
628   PetscInt          *nprocs,j,idx,nsends,row;
629   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
630   PetscInt          *rvalues,count,base,slen,*source;
631   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
632   MPI_Comm          comm = ((PetscObject)A)->comm;
633   MPI_Request       *send_waits,*recv_waits;
634   MPI_Status        recv_status,*send_status;
635   const PetscScalar *xx;
636   PetscScalar       *bb;
637 #if defined(PETSC_DEBUG)
638   PetscBool      found = PETSC_FALSE;
639 #endif
640 
641   PetscFunctionBegin;
642   /*  first count number of contributors to each processor */
643   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
644   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
645   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
646   j = 0;
647   for (i=0; i<N; i++) {
648     if (lastidx > (idx = rows[i])) j = 0;
649     lastidx = idx;
650     for (; j<size; j++) {
651       if (idx >= owners[j] && idx < owners[j+1]) {
652         nprocs[2*j]++;
653         nprocs[2*j+1] = 1;
654         owner[i] = j;
655 #if defined(PETSC_DEBUG)
656         found = PETSC_TRUE;
657 #endif
658         break;
659       }
660     }
661 #if defined(PETSC_DEBUG)
662     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
663     found = PETSC_FALSE;
664 #endif
665   }
666   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
667 
668   if (A->nooffproczerorows) {
669     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
670     nrecvs = nsends;
671     nmax   = N;
672   } else {
673     /* inform other processors of number of messages and max length*/
674     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
675   }
676 
677   /* post receives:   */
678   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
679   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
680   for (i=0; i<nrecvs; i++) {
681     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
682   }
683 
684   /* do sends:
685       1) starts[i] gives the starting index in svalues for stuff going to
686          the ith processor
687   */
688   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
689   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
690   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
691   starts[0] = 0;
692   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
693   for (i=0; i<N; i++) {
694     svalues[starts[owner[i]]++] = rows[i];
695   }
696 
697   starts[0] = 0;
698   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
699   count = 0;
700   for (i=0; i<size; i++) {
701     if (nprocs[2*i+1]) {
702       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
703     }
704   }
705   ierr = PetscFree(starts);CHKERRQ(ierr);
706 
707   base = owners[rank];
708 
709   /*  wait on receives */
710   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
711   count  = nrecvs; slen = 0;
712   while (count) {
713     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
714     /* unpack receives into our local space */
715     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
716     source[imdex]  = recv_status.MPI_SOURCE;
717     lens[imdex]    = n;
718     slen          += n;
719     count--;
720   }
721   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
722 
723   /* move the data into the send scatter */
724   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
725   count = 0;
726   for (i=0; i<nrecvs; i++) {
727     values = rvalues + i*nmax;
728     for (j=0; j<lens[i]; j++) {
729       lrows[count++] = values[j] - base;
730     }
731   }
732   ierr = PetscFree(rvalues);CHKERRQ(ierr);
733   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
734   ierr = PetscFree(owner);CHKERRQ(ierr);
735   ierr = PetscFree(nprocs);CHKERRQ(ierr);
736 
737   /* fix right hand side if needed */
738   if (x && b) {
739     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
740     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
741     for (i=0; i<slen; i++) {
742       bb[lrows[i]] = diag*xx[lrows[i]];
743     }
744     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
745     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
746   }
747   /*
748         Zero the required rows. If the "diagonal block" of the matrix
749      is square and the user wishes to set the diagonal we use separate
750      code so that MatSetValues() is not called for each diagonal allocating
751      new memory, thus calling lots of mallocs and slowing things down.
752 
753   */
754   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
755   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
756   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
757     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
758   } else if (diag != 0.0) {
759     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
760     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
761       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
762 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
763     }
764     for (i = 0; i < slen; i++) {
765       row  = lrows[i] + rstart;
766       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
767     }
768     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
769     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770   } else {
771     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
772   }
773   ierr = PetscFree(lrows);CHKERRQ(ierr);
774 
775   /* wait on sends */
776   if (nsends) {
777     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
778     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
779     ierr = PetscFree(send_status);CHKERRQ(ierr);
780   }
781   ierr = PetscFree(send_waits);CHKERRQ(ierr);
782   ierr = PetscFree(svalues);CHKERRQ(ierr);
783 
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
789 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
790 {
791   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
792   PetscErrorCode    ierr;
793   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
794   PetscInt          i,*owners = A->rmap->range;
795   PetscInt          *nprocs,j,idx,nsends;
796   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
797   PetscInt          *rvalues,count,base,slen,*source;
798   PetscInt          *lens,*lrows,*values,m;
799   MPI_Comm          comm = ((PetscObject)A)->comm;
800   MPI_Request       *send_waits,*recv_waits;
801   MPI_Status        recv_status,*send_status;
802   const PetscScalar *xx;
803   PetscScalar       *bb,*mask;
804   Vec               xmask,lmask;
805   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
806   const PetscInt    *aj, *ii,*ridx;
807   PetscScalar       *aa;
808 #if defined(PETSC_DEBUG)
809   PetscBool         found = PETSC_FALSE;
810 #endif
811 
812   PetscFunctionBegin;
813   /*  first count number of contributors to each processor */
814   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
815   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
816   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
817   j = 0;
818   for (i=0; i<N; i++) {
819     if (lastidx > (idx = rows[i])) j = 0;
820     lastidx = idx;
821     for (; j<size; j++) {
822       if (idx >= owners[j] && idx < owners[j+1]) {
823         nprocs[2*j]++;
824         nprocs[2*j+1] = 1;
825         owner[i] = j;
826 #if defined(PETSC_DEBUG)
827         found = PETSC_TRUE;
828 #endif
829         break;
830       }
831     }
832 #if defined(PETSC_DEBUG)
833     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
834     found = PETSC_FALSE;
835 #endif
836   }
837   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
838 
839   /* inform other processors of number of messages and max length*/
840   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
841 
842   /* post receives:   */
843   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
844   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
845   for (i=0; i<nrecvs; i++) {
846     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
847   }
848 
849   /* do sends:
850       1) starts[i] gives the starting index in svalues for stuff going to
851          the ith processor
852   */
853   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
854   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
855   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
856   starts[0] = 0;
857   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
858   for (i=0; i<N; i++) {
859     svalues[starts[owner[i]]++] = rows[i];
860   }
861 
862   starts[0] = 0;
863   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
864   count = 0;
865   for (i=0; i<size; i++) {
866     if (nprocs[2*i+1]) {
867       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
868     }
869   }
870   ierr = PetscFree(starts);CHKERRQ(ierr);
871 
872   base = owners[rank];
873 
874   /*  wait on receives */
875   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
876   count  = nrecvs; slen = 0;
877   while (count) {
878     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
879     /* unpack receives into our local space */
880     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
881     source[imdex]  = recv_status.MPI_SOURCE;
882     lens[imdex]    = n;
883     slen          += n;
884     count--;
885   }
886   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
887 
888   /* move the data into the send scatter */
889   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
890   count = 0;
891   for (i=0; i<nrecvs; i++) {
892     values = rvalues + i*nmax;
893     for (j=0; j<lens[i]; j++) {
894       lrows[count++] = values[j] - base;
895     }
896   }
897   ierr = PetscFree(rvalues);CHKERRQ(ierr);
898   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
899   ierr = PetscFree(owner);CHKERRQ(ierr);
900   ierr = PetscFree(nprocs);CHKERRQ(ierr);
901   /* lrows are the local rows to be zeroed, slen is the number of local rows */
902 
903   /* zero diagonal part of matrix */
904   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
905 
906   /* handle off diagonal part of matrix */
907   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
908   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
909   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
910   for (i=0; i<slen; i++) {
911     bb[lrows[i]] = 1;
912   }
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
917   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
921   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922 
923   /* remove zeroed rows of off diagonal matrix */
924   ii = aij->i;
925   for (i=0; i<slen; i++) {
926     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
927   }
928 
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use){
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++){
935       n   = ii[i+1] - ii[i];
936       aj  = aij->j + ii[i];
937       aa  = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           bb[*ridx] -= *aa*xx[*aj];
942           *aa        = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951    for (i=0; i<m; i++) {
952      n   = ii[i+1] - ii[i];
953      aj  = aij->j + ii[i];
954      aa  = aij->a + ii[i];
955      for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           bb[i] -= *aa*xx[*aj];
958           *aa    = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965 
966   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
968   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971 
972   /* wait on sends */
973   if (nsends) {
974     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
975     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
976     ierr = PetscFree(send_status);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(send_waits);CHKERRQ(ierr);
979   ierr = PetscFree(svalues);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMult_MPIAIJ"
986 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscInt       nt;
991 
992   PetscFunctionBegin;
993   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
994   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
995   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1004 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1016 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1024   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1031 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1032 {
1033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1034   PetscErrorCode ierr;
1035   PetscBool      merged;
1036 
1037   PetscFunctionBegin;
1038   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1039   /* do nondiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1041   if (!merged) {
1042     /* send it on its way */
1043     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044     /* do local part */
1045     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1046     /* receive remote parts: note this assumes the values are not actually */
1047     /* added in yy until the next line, */
1048     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049   } else {
1050     /* do local part */
1051     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1052     /* send it on its way */
1053     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054     /* values actually were received in the Begin() but we need to call this nop */
1055     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 EXTERN_C_BEGIN
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1063 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1064 {
1065   MPI_Comm       comm;
1066   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1067   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1068   IS             Me,Notme;
1069   PetscErrorCode ierr;
1070   PetscInt       M,N,first,last,*notme,i;
1071   PetscMPIInt    size;
1072 
1073   PetscFunctionBegin;
1074 
1075   /* Easy test: symmetric diagonal block */
1076   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1077   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1078   if (!*f) PetscFunctionReturn(0);
1079   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081   if (size == 1) PetscFunctionReturn(0);
1082 
1083   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1084   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1085   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1086   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1087   for (i=0; i<first; i++) notme[i] = i;
1088   for (i=last; i<M; i++) notme[i-last+first] = i;
1089   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1090   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1091   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1092   Aoff = Aoffs[0];
1093   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1094   Boff = Boffs[0];
1095   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1096   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1098   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1099   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1100   ierr = PetscFree(notme);CHKERRQ(ierr);
1101    PetscFunctionReturn(0);
1102 }
1103 EXTERN_C_END
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1107 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   /* do nondiagonal part */
1114   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1115   /* send it on its way */
1116   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1117   /* do local part */
1118   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1119   /* receive remote parts */
1120   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 /*
1125   This only works correctly for square matrices where the subblock A->A is the
1126    diagonal block
1127 */
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1130 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1131 {
1132   PetscErrorCode ierr;
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134 
1135   PetscFunctionBegin;
1136   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1137   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1138   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatScale_MPIAIJ"
1144 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1145 {
1146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1151   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatDestroy_MPIAIJ"
1157 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1158 {
1159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163 #if defined(PETSC_USE_LOG)
1164   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1165 #endif
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1169   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1170 #if defined (PETSC_USE_CTABLE)
1171   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1177   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(aij);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1196 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1197 {
1198   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1199   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1200   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1201   PetscErrorCode    ierr;
1202   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1203   int               fd;
1204   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1205   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1206   PetscScalar       *column_values;
1207   PetscInt          message_count,flowcontrolcount;
1208 
1209   PetscFunctionBegin;
1210   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1211   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1212   nz   = A->nz + B->nz;
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1218     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) {
1224       rlen = PetscMax(rlen,range[i+1] - range[i]);
1225     }
1226   } else {
1227     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1228     rlen = mat->rmap->n;
1229   }
1230 
1231   /* load up the local row counts */
1232   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1233   for (i=0; i<mat->rmap->n; i++) {
1234     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1235   }
1236 
1237   /* store the row lengths to the file */
1238   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1239   if (!rank) {
1240     MPI_Status status;
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1251     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* )th processor needs space a largest processor needs */
1258   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1259   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1260   cnt  = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ( (col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) {
1267       column_indices[cnt++] = A->j[k] + cstart;
1268     }
1269     for (; j<B->i[i+1]; j++) {
1270       column_indices[cnt++] = garray[B->j[j]];
1271     }
1272   }
1273   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1274 
1275   /* store the column indices to the file */
1276    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     MPI_Status status;
1279     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1280     for (i=1; i<size; i++) {
1281       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1282       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1283       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1284       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1290     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1291     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1292     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1293   }
1294   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1295 
1296   /* load up the local column values */
1297   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1298   cnt  = 0;
1299   for (i=0; i<mat->rmap->n; i++) {
1300     for (j=B->i[i]; j<B->i[i+1]; j++) {
1301       if ( garray[B->j[j]] > cstart) break;
1302       column_values[cnt++] = B->a[j];
1303     }
1304     for (k=A->i[i]; k<A->i[i+1]; k++) {
1305       column_values[cnt++] = A->a[k];
1306     }
1307     for (; j<B->i[i+1]; j++) {
1308       column_values[cnt++] = B->a[j];
1309     }
1310   }
1311   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1312 
1313   /* store the column values to the file */
1314    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1315   if (!rank) {
1316     MPI_Status status;
1317     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1318     for (i=1; i<size; i++) {
1319        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1320       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1321       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1322       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1323       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1324     }
1325     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1326   } else {
1327     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1328     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1329     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1331   }
1332   ierr = PetscFree(column_values);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1338 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1339 {
1340   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1341   PetscErrorCode    ierr;
1342   PetscMPIInt       rank = aij->rank,size = aij->size;
1343   PetscBool         isdraw,iascii,isbinary;
1344   PetscViewer       sviewer;
1345   PetscViewerFormat format;
1346 
1347   PetscFunctionBegin;
1348   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1350   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1351   if (iascii) {
1352     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1353     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1354       MatInfo    info;
1355       PetscBool  inodes;
1356 
1357       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1358       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1359       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1361       if (!inodes) {
1362         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1363 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1364       } else {
1365         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1366 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1367       }
1368       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1370       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1371       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1372       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1374       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1375       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1378       PetscInt   inodecount,inodelimit,*inodes;
1379       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1380       if (inodes) {
1381         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1382       } else {
1383         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1384       }
1385       PetscFunctionReturn(0);
1386     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1387       PetscFunctionReturn(0);
1388     }
1389   } else if (isbinary) {
1390     if (size == 1) {
1391       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1392       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1393     } else {
1394       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1395     }
1396     PetscFunctionReturn(0);
1397   } else if (isdraw) {
1398     PetscDraw  draw;
1399     PetscBool  isnull;
1400     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1401     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1402   }
1403 
1404   if (size == 1) {
1405     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1406     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1407   } else {
1408     /* assemble the entire matrix onto first processor. */
1409     Mat         A;
1410     Mat_SeqAIJ  *Aloc;
1411     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1412     MatScalar   *a;
1413 
1414     if (mat->rmap->N > 1024) {
1415       PetscBool  flg = PETSC_FALSE;
1416 
1417       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1418       if (!flg) {
1419         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1420       }
1421     }
1422 
1423     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1424     if (!rank) {
1425       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1426     } else {
1427       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1428     }
1429     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1430     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1431     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1432     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1433 
1434     /* copy over the A part */
1435     Aloc = (Mat_SeqAIJ*)aij->A->data;
1436     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1437     row = mat->rmap->rstart;
1438     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1439     for (i=0; i<m; i++) {
1440       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1441       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1442     }
1443     aj = Aloc->j;
1444     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1445 
1446     /* copy over the B part */
1447     Aloc = (Mat_SeqAIJ*)aij->B->data;
1448     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1449     row  = mat->rmap->rstart;
1450     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1451     ct   = cols;
1452     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1453     for (i=0; i<m; i++) {
1454       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1455       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1456     }
1457     ierr = PetscFree(ct);CHKERRQ(ierr);
1458     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460     /*
1461        Everyone has to call to draw the matrix since the graphics waits are
1462        synchronized across all processors that share the PetscDraw object
1463     */
1464     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1465     if (!rank) {
1466       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1467       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1468       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1469       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1470     }
1471     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1472     ierr = MatDestroy(&A);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatView_MPIAIJ"
1479 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1480 {
1481   PetscErrorCode ierr;
1482   PetscBool      iascii,isdraw,issocket,isbinary;
1483 
1484   PetscFunctionBegin;
1485   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1486   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1487   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1488   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1489   if (iascii || isdraw || isbinary || issocket) {
1490     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1491   } else {
1492     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatSOR_MPIAIJ"
1499 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1500 {
1501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1502   PetscErrorCode ierr;
1503   Vec            bb1 = 0;
1504   PetscBool      hasop;
1505 
1506   PetscFunctionBegin;
1507   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1508     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1509   }
1510 
1511   if (flag == SOR_APPLY_UPPER) {
1512     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1513     PetscFunctionReturn(0);
1514   }
1515 
1516   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1517     if (flag & SOR_ZERO_INITIAL_GUESS) {
1518       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1519       its--;
1520     }
1521 
1522     while (its--) {
1523       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525 
1526       /* update rhs: bb1 = bb - B*x */
1527       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1528       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1529 
1530       /* local sweep */
1531       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1532     }
1533   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1534     if (flag & SOR_ZERO_INITIAL_GUESS) {
1535       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1536       its--;
1537     }
1538     while (its--) {
1539       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541 
1542       /* update rhs: bb1 = bb - B*x */
1543       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1544       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1545 
1546       /* local sweep */
1547       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1548     }
1549   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554     while (its--) {
1555       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557 
1558       /* update rhs: bb1 = bb - B*x */
1559       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1560       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1561 
1562       /* local sweep */
1563       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1564     }
1565   }  else if (flag & SOR_EISENSTAT) {
1566     Vec         xx1;
1567 
1568     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1569     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1570 
1571     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573     if (!mat->diag) {
1574       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1575       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1576     }
1577     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1578     if (hasop) {
1579       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1580     } else {
1581       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1582     }
1583     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1584 
1585     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1586 
1587     /* local sweep */
1588     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1589     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1590     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1591   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1592 
1593   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatPermute_MPIAIJ"
1599 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1600 {
1601   MPI_Comm       comm,pcomm;
1602   PetscInt       first,local_size,nrows;
1603   const PetscInt *rows;
1604   PetscMPIInt    size;
1605   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1606   PetscErrorCode ierr;
1607 
1608   PetscFunctionBegin;
1609   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1610   /* make a collective version of 'rowp' */
1611   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1612   if (pcomm==comm) {
1613     crowp = rowp;
1614   } else {
1615     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1616     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1617     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1618     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1619   }
1620   /* collect the global row permutation and invert it */
1621   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1622   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1623   if (pcomm!=comm) {
1624     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1625   }
1626   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1627   /* get the local target indices */
1628   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1629   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1630   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1631   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1632   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1633   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1634   /* the column permutation is so much easier;
1635      make a local version of 'colp' and invert it */
1636   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1637   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1638   if (size==1) {
1639     lcolp = colp;
1640   } else {
1641     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1642     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1643     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1644   }
1645   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1646   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1647   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1648   if (size>1) {
1649     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1650     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1651   }
1652   /* now we just get the submatrix */
1653   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1654   /* clean up */
1655   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1656   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1662 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1663 {
1664   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1665   Mat            A = mat->A,B = mat->B;
1666   PetscErrorCode ierr;
1667   PetscReal      isend[5],irecv[5];
1668 
1669   PetscFunctionBegin;
1670   info->block_size     = 1.0;
1671   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1672   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1673   isend[3] = info->memory;  isend[4] = info->mallocs;
1674   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1675   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1676   isend[3] += info->memory;  isend[4] += info->mallocs;
1677   if (flag == MAT_LOCAL) {
1678     info->nz_used      = isend[0];
1679     info->nz_allocated = isend[1];
1680     info->nz_unneeded  = isend[2];
1681     info->memory       = isend[3];
1682     info->mallocs      = isend[4];
1683   } else if (flag == MAT_GLOBAL_MAX) {
1684     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1685     info->nz_used      = irecv[0];
1686     info->nz_allocated = irecv[1];
1687     info->nz_unneeded  = irecv[2];
1688     info->memory       = irecv[3];
1689     info->mallocs      = irecv[4];
1690   } else if (flag == MAT_GLOBAL_SUM) {
1691     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1692     info->nz_used      = irecv[0];
1693     info->nz_allocated = irecv[1];
1694     info->nz_unneeded  = irecv[2];
1695     info->memory       = irecv[3];
1696     info->mallocs      = irecv[4];
1697   }
1698   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1699   info->fill_ratio_needed = 0;
1700   info->factor_mallocs    = 0;
1701 
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatSetOption_MPIAIJ"
1707 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1708 {
1709   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   switch (op) {
1714   case MAT_NEW_NONZERO_LOCATIONS:
1715   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1716   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1717   case MAT_KEEP_NONZERO_PATTERN:
1718   case MAT_NEW_NONZERO_LOCATION_ERR:
1719   case MAT_USE_INODES:
1720   case MAT_IGNORE_ZERO_ENTRIES:
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_ROW_ORIENTED:
1725     a->roworiented = flg;
1726     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1727     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_NEW_DIAGONALS:
1730     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1731     break;
1732   case MAT_IGNORE_OFF_PROC_ENTRIES:
1733     a->donotstash = PETSC_TRUE;
1734     break;
1735   case MAT_SPD:
1736     A->spd_set                         = PETSC_TRUE;
1737     A->spd                             = flg;
1738     if (flg) {
1739       A->symmetric                     = PETSC_TRUE;
1740       A->structurally_symmetric        = PETSC_TRUE;
1741       A->symmetric_set                 = PETSC_TRUE;
1742       A->structurally_symmetric_set    = PETSC_TRUE;
1743     }
1744     break;
1745   case MAT_SYMMETRIC:
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_STRUCTURALLY_SYMMETRIC:
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_HERMITIAN:
1752     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_SYMMETRY_ETERNAL:
1755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1756     break;
1757   default:
1758     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatGetRow_MPIAIJ"
1765 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1766 {
1767   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1768   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1769   PetscErrorCode ierr;
1770   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1771   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1772   PetscInt       *cmap,*idx_p;
1773 
1774   PetscFunctionBegin;
1775   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1776   mat->getrowactive = PETSC_TRUE;
1777 
1778   if (!mat->rowvalues && (idx || v)) {
1779     /*
1780         allocate enough space to hold information from the longest row.
1781     */
1782     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1783     PetscInt   max = 1,tmp;
1784     for (i=0; i<matin->rmap->n; i++) {
1785       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1786       if (max < tmp) { max = tmp; }
1787     }
1788     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1789   }
1790 
1791   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1792   lrow = row - rstart;
1793 
1794   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1795   if (!v)   {pvA = 0; pvB = 0;}
1796   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1797   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1798   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1799   nztot = nzA + nzB;
1800 
1801   cmap  = mat->garray;
1802   if (v  || idx) {
1803     if (nztot) {
1804       /* Sort by increasing column numbers, assuming A and B already sorted */
1805       PetscInt imark = -1;
1806       if (v) {
1807         *v = v_p = mat->rowvalues;
1808         for (i=0; i<nzB; i++) {
1809           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1810           else break;
1811         }
1812         imark = i;
1813         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1814         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1815       }
1816       if (idx) {
1817         *idx = idx_p = mat->rowindices;
1818         if (imark > -1) {
1819           for (i=0; i<imark; i++) {
1820             idx_p[i] = cmap[cworkB[i]];
1821           }
1822         } else {
1823           for (i=0; i<nzB; i++) {
1824             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1825             else break;
1826           }
1827           imark = i;
1828         }
1829         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1830         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1831       }
1832     } else {
1833       if (idx) *idx = 0;
1834       if (v)   *v   = 0;
1835     }
1836   }
1837   *nz = nztot;
1838   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1845 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1848 
1849   PetscFunctionBegin;
1850   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1851   aij->getrowactive = PETSC_FALSE;
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatNorm_MPIAIJ"
1857 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1858 {
1859   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1860   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1861   PetscErrorCode ierr;
1862   PetscInt       i,j,cstart = mat->cmap->rstart;
1863   PetscReal      sum = 0.0;
1864   MatScalar      *v;
1865 
1866   PetscFunctionBegin;
1867   if (aij->size == 1) {
1868     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1869   } else {
1870     if (type == NORM_FROBENIUS) {
1871       v = amat->a;
1872       for (i=0; i<amat->nz; i++) {
1873 #if defined(PETSC_USE_COMPLEX)
1874         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1875 #else
1876         sum += (*v)*(*v); v++;
1877 #endif
1878       }
1879       v = bmat->a;
1880       for (i=0; i<bmat->nz; i++) {
1881 #if defined(PETSC_USE_COMPLEX)
1882         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1883 #else
1884         sum += (*v)*(*v); v++;
1885 #endif
1886       }
1887       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1888       *norm = sqrt(*norm);
1889     } else if (type == NORM_1) { /* max column norm */
1890       PetscReal *tmp,*tmp2;
1891       PetscInt  *jj,*garray = aij->garray;
1892       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1893       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1894       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1895       *norm = 0.0;
1896       v = amat->a; jj = amat->j;
1897       for (j=0; j<amat->nz; j++) {
1898         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1899       }
1900       v = bmat->a; jj = bmat->j;
1901       for (j=0; j<bmat->nz; j++) {
1902         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1903       }
1904       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1905       for (j=0; j<mat->cmap->N; j++) {
1906         if (tmp2[j] > *norm) *norm = tmp2[j];
1907       }
1908       ierr = PetscFree(tmp);CHKERRQ(ierr);
1909       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1910     } else if (type == NORM_INFINITY) { /* max row norm */
1911       PetscReal ntemp = 0.0;
1912       for (j=0; j<aij->A->rmap->n; j++) {
1913         v = amat->a + amat->i[j];
1914         sum = 0.0;
1915         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1916           sum += PetscAbsScalar(*v); v++;
1917         }
1918         v = bmat->a + bmat->i[j];
1919         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1920           sum += PetscAbsScalar(*v); v++;
1921         }
1922         if (sum > ntemp) ntemp = sum;
1923       }
1924       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1925     } else {
1926       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1927     }
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatTranspose_MPIAIJ"
1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935 {
1936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1937   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1938   PetscErrorCode ierr;
1939   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1940   PetscInt       cstart=A->cmap->rstart,ncol;
1941   Mat            B;
1942   MatScalar      *array;
1943 
1944   PetscFunctionBegin;
1945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1946 
1947   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1948   ai = Aloc->i; aj = Aloc->j;
1949   bi = Bloc->i; bj = Bloc->j;
1950   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1952     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1953     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1954     for (i=0; i<ai[ma]; i++){
1955       d_nnz[aj[i]] ++;
1956       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1957     }
1958 
1959     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1960     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1962     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1963     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1964   } else {
1965     B = *matout;
1966   }
1967 
1968   /* copy over the A part */
1969   array = Aloc->a;
1970   row = A->rmap->rstart;
1971   for (i=0; i<ma; i++) {
1972     ncol = ai[i+1]-ai[i];
1973     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1974     row++; array += ncol; aj += ncol;
1975   }
1976   aj = Aloc->j;
1977   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1978 
1979   /* copy over the B part */
1980   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1981   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1982   array = Bloc->a;
1983   row = A->rmap->rstart;
1984   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1985   cols_tmp = cols;
1986   for (i=0; i<mb; i++) {
1987     ncol = bi[i+1]-bi[i];
1988     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1989     row++; array += ncol; cols_tmp += ncol;
1990   }
1991   ierr = PetscFree(cols);CHKERRQ(ierr);
1992 
1993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1996     *matout = B;
1997   } else {
1998     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2005 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2006 {
2007   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2008   Mat            a = aij->A,b = aij->B;
2009   PetscErrorCode ierr;
2010   PetscInt       s1,s2,s3;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2014   if (rr) {
2015     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2016     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2017     /* Overlap communication with computation. */
2018     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2019   }
2020   if (ll) {
2021     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2022     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2023     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2024   }
2025   /* scale  the diagonal block */
2026   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2027 
2028   if (rr) {
2029     /* Do a scatter end and then right scale the off-diagonal block */
2030     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2031     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2032   }
2033 
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2039 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2040 {
2041   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2046   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2047   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2048   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2053 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2054 {
2055   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatEqual_MPIAIJ"
2065 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2066 {
2067   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2068   Mat            a,b,c,d;
2069   PetscBool      flg;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   a = matA->A; b = matA->B;
2074   c = matB->A; d = matB->B;
2075 
2076   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2077   if (flg) {
2078     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2079   }
2080   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatCopy_MPIAIJ"
2086 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2087 {
2088   PetscErrorCode ierr;
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2090   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2091 
2092   PetscFunctionBegin;
2093   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2094   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2095     /* because of the column compression in the off-processor part of the matrix a->B,
2096        the number of columns in a->B and b->B may be different, hence we cannot call
2097        the MatCopy() directly on the two parts. If need be, we can provide a more
2098        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2099        then copying the submatrices */
2100     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2101   } else {
2102     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2103     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2104   }
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2110 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2111 {
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatAXPY_MPIAIJ"
2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2122 {
2123   PetscErrorCode ierr;
2124   PetscInt       i;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x = (Mat_SeqAIJ *)xx->A->data;
2133     y = (Mat_SeqAIJ *)yy->A->data;
2134     bnz = PetscBLASIntCast(x->nz);
2135     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2136     x = (Mat_SeqAIJ *)xx->B->data;
2137     y = (Mat_SeqAIJ *)yy->B->data;
2138     bnz = PetscBLASIntCast(x->nz);
2139     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2140   } else if (str == SUBSET_NONZERO_PATTERN) {
2141     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2142 
2143     x = (Mat_SeqAIJ *)xx->B->data;
2144     y = (Mat_SeqAIJ *)yy->B->data;
2145     if (y->xtoy && y->XtoY != xx->B) {
2146       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2147       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2148     }
2149     if (!y->xtoy) { /* get xtoy */
2150       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2151       y->XtoY = xx->B;
2152       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2153     }
2154     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2155   } else {
2156     Mat B;
2157     PetscInt *nnz_d,*nnz_o;
2158     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2159     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2160     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2161     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2162     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2163     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2164     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2165     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2166     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2167     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2168     ierr = MatHeaderReplace(Y,B);
2169     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2170     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatConjugate_MPIAIJ"
2179 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2180 {
2181 #if defined(PETSC_USE_COMPLEX)
2182   PetscErrorCode ierr;
2183   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2184 
2185   PetscFunctionBegin;
2186   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2187   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2188 #else
2189   PetscFunctionBegin;
2190 #endif
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatRealPart_MPIAIJ"
2196 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2197 {
2198   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2203   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2209 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2210 {
2211   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2216   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #ifdef PETSC_HAVE_PBGL
2221 
2222 #include <boost/parallel/mpi/bsp_process_group.hpp>
2223 #include <boost/graph/distributed/ilu_default_graph.hpp>
2224 #include <boost/graph/distributed/ilu_0_block.hpp>
2225 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2226 #include <boost/graph/distributed/petsc/interface.hpp>
2227 #include <boost/multi_array.hpp>
2228 #include <boost/parallel/distributed_property_map->hpp>
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2232 /*
2233   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2234 */
2235 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2236 {
2237   namespace petsc = boost::distributed::petsc;
2238 
2239   namespace graph_dist = boost::graph::distributed;
2240   using boost::graph::distributed::ilu_default::process_group_type;
2241   using boost::graph::ilu_permuted;
2242 
2243   PetscBool       row_identity, col_identity;
2244   PetscContainer  c;
2245   PetscInt        m, n, M, N;
2246   PetscErrorCode  ierr;
2247 
2248   PetscFunctionBegin;
2249   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2250   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2251   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2252   if (!row_identity || !col_identity) {
2253     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2254   }
2255 
2256   process_group_type pg;
2257   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2258   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2259   lgraph_type&   level_graph = *lgraph_p;
2260   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2261 
2262   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2263   ilu_permuted(level_graph);
2264 
2265   /* put together the new matrix */
2266   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2267   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2268   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2269   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2270   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2271   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273 
2274   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2275   ierr = PetscContainerSetPointer(c, lgraph_p);
2276   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2282 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2283 {
2284   PetscFunctionBegin;
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatSolve_MPIAIJ"
2290 /*
2291   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2292 */
2293 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2294 {
2295   namespace graph_dist = boost::graph::distributed;
2296 
2297   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2298   lgraph_type*   lgraph_p;
2299   PetscContainer c;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2304   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2305   ierr = VecCopy(b, x);CHKERRQ(ierr);
2306 
2307   PetscScalar* array_x;
2308   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2309   PetscInt sx;
2310   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2311 
2312   PetscScalar* array_b;
2313   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2314   PetscInt sb;
2315   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2316 
2317   lgraph_type&   level_graph = *lgraph_p;
2318   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2319 
2320   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2321   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2322                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2323 
2324   typedef boost::iterator_property_map<array_ref_type::iterator,
2325                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2326   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2327                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2328 
2329   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2330 
2331   PetscFunctionReturn(0);
2332 }
2333 #endif
2334 
2335 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2336   PetscInt       nzlocal,nsends,nrecvs;
2337   PetscMPIInt    *send_rank,*recv_rank;
2338   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2339   PetscScalar    *sbuf_a,**rbuf_a;
2340   PetscErrorCode (*MatDestroy)(Mat);
2341 } Mat_Redundant;
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2345 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2346 {
2347   PetscErrorCode       ierr;
2348   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2349   PetscInt             i;
2350 
2351   PetscFunctionBegin;
2352   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2353   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2354   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2355   for (i=0; i<redund->nrecvs; i++){
2356     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2357     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2358   }
2359   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2360   ierr = PetscFree(redund);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatDestroy_MatRedundant"
2366 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2367 {
2368   PetscErrorCode  ierr;
2369   PetscContainer  container;
2370   Mat_Redundant   *redund=PETSC_NULL;
2371 
2372   PetscFunctionBegin;
2373   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2374   if (container) {
2375     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2376   } else {
2377     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2378   }
2379   A->ops->destroy = redund->MatDestroy;
2380   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2381   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2382   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2388 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2389 {
2390   PetscMPIInt    rank,size;
2391   MPI_Comm       comm=((PetscObject)mat)->comm;
2392   PetscErrorCode ierr;
2393   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2394   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2395   PetscInt       *rowrange=mat->rmap->range;
2396   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2397   Mat            A=aij->A,B=aij->B,C=*matredundant;
2398   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2399   PetscScalar    *sbuf_a;
2400   PetscInt       nzlocal=a->nz+b->nz;
2401   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2402   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2403   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2404   MatScalar      *aworkA,*aworkB;
2405   PetscScalar    *vals;
2406   PetscMPIInt    tag1,tag2,tag3,imdex;
2407   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2408                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2409   MPI_Status     recv_status,*send_status;
2410   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2411   PetscInt       **rbuf_j=PETSC_NULL;
2412   PetscScalar    **rbuf_a=PETSC_NULL;
2413   Mat_Redundant  *redund=PETSC_NULL;
2414   PetscContainer container;
2415 
2416   PetscFunctionBegin;
2417   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2419 
2420   if (reuse == MAT_REUSE_MATRIX) {
2421     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2422     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2423     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2424     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2425     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2426     if (container) {
2427       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2428     } else {
2429       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2430     }
2431     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2432 
2433     nsends    = redund->nsends;
2434     nrecvs    = redund->nrecvs;
2435     send_rank = redund->send_rank;
2436     recv_rank = redund->recv_rank;
2437     sbuf_nz   = redund->sbuf_nz;
2438     rbuf_nz   = redund->rbuf_nz;
2439     sbuf_j    = redund->sbuf_j;
2440     sbuf_a    = redund->sbuf_a;
2441     rbuf_j    = redund->rbuf_j;
2442     rbuf_a    = redund->rbuf_a;
2443   }
2444 
2445   if (reuse == MAT_INITIAL_MATRIX){
2446     PetscMPIInt  subrank,subsize;
2447     PetscInt     nleftover,np_subcomm;
2448     /* get the destination processors' id send_rank, nsends and nrecvs */
2449     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2450     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2451     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2452     np_subcomm = size/nsubcomm;
2453     nleftover  = size - nsubcomm*np_subcomm;
2454     nsends = 0; nrecvs = 0;
2455     for (i=0; i<size; i++){ /* i=rank*/
2456       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2457         send_rank[nsends] = i; nsends++;
2458         recv_rank[nrecvs++] = i;
2459       }
2460     }
2461     if (rank >= size - nleftover){/* this proc is a leftover processor */
2462       i = size-nleftover-1;
2463       j = 0;
2464       while (j < nsubcomm - nleftover){
2465         send_rank[nsends++] = i;
2466         i--; j++;
2467       }
2468     }
2469 
2470     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2471       for (i=0; i<nleftover; i++){
2472         recv_rank[nrecvs++] = size-nleftover+i;
2473       }
2474     }
2475 
2476     /* allocate sbuf_j, sbuf_a */
2477     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2478     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2479     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2480   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2481 
2482   /* copy mat's local entries into the buffers */
2483   if (reuse == MAT_INITIAL_MATRIX){
2484     rownz_max = 0;
2485     rptr = sbuf_j;
2486     cols = sbuf_j + rend-rstart + 1;
2487     vals = sbuf_a;
2488     rptr[0] = 0;
2489     for (i=0; i<rend-rstart; i++){
2490       row = i + rstart;
2491       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2492       ncols  = nzA + nzB;
2493       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2494       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2495       /* load the column indices for this row into cols */
2496       lwrite = 0;
2497       for (l=0; l<nzB; l++) {
2498         if ((ctmp = bmap[cworkB[l]]) < cstart){
2499           vals[lwrite]   = aworkB[l];
2500           cols[lwrite++] = ctmp;
2501         }
2502       }
2503       for (l=0; l<nzA; l++){
2504         vals[lwrite]   = aworkA[l];
2505         cols[lwrite++] = cstart + cworkA[l];
2506       }
2507       for (l=0; l<nzB; l++) {
2508         if ((ctmp = bmap[cworkB[l]]) >= cend){
2509           vals[lwrite]   = aworkB[l];
2510           cols[lwrite++] = ctmp;
2511         }
2512       }
2513       vals += ncols;
2514       cols += ncols;
2515       rptr[i+1] = rptr[i] + ncols;
2516       if (rownz_max < ncols) rownz_max = ncols;
2517     }
2518     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2519   } else { /* only copy matrix values into sbuf_a */
2520     rptr = sbuf_j;
2521     vals = sbuf_a;
2522     rptr[0] = 0;
2523     for (i=0; i<rend-rstart; i++){
2524       row = i + rstart;
2525       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2526       ncols  = nzA + nzB;
2527       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2528       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2529       lwrite = 0;
2530       for (l=0; l<nzB; l++) {
2531         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2532       }
2533       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2534       for (l=0; l<nzB; l++) {
2535         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2536       }
2537       vals += ncols;
2538       rptr[i+1] = rptr[i] + ncols;
2539     }
2540   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2541 
2542   /* send nzlocal to others, and recv other's nzlocal */
2543   /*--------------------------------------------------*/
2544   if (reuse == MAT_INITIAL_MATRIX){
2545     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2546     s_waits2 = s_waits3 + nsends;
2547     s_waits1 = s_waits2 + nsends;
2548     r_waits1 = s_waits1 + nsends;
2549     r_waits2 = r_waits1 + nrecvs;
2550     r_waits3 = r_waits2 + nrecvs;
2551   } else {
2552     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2553     r_waits3 = s_waits3 + nsends;
2554   }
2555 
2556   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2557   if (reuse == MAT_INITIAL_MATRIX){
2558     /* get new tags to keep the communication clean */
2559     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2560     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2561     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2562 
2563     /* post receives of other's nzlocal */
2564     for (i=0; i<nrecvs; i++){
2565       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2566     }
2567     /* send nzlocal to others */
2568     for (i=0; i<nsends; i++){
2569       sbuf_nz[i] = nzlocal;
2570       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2571     }
2572     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2573     count = nrecvs;
2574     while (count) {
2575       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2576       recv_rank[imdex] = recv_status.MPI_SOURCE;
2577       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2578       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2579 
2580       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2581       rbuf_nz[imdex] += i + 2;
2582       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2583       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2584       count--;
2585     }
2586     /* wait on sends of nzlocal */
2587     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2588     /* send mat->i,j to others, and recv from other's */
2589     /*------------------------------------------------*/
2590     for (i=0; i<nsends; i++){
2591       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2592       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2593     }
2594     /* wait on receives of mat->i,j */
2595     /*------------------------------*/
2596     count = nrecvs;
2597     while (count) {
2598       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2599       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2600       count--;
2601     }
2602     /* wait on sends of mat->i,j */
2603     /*---------------------------*/
2604     if (nsends) {
2605       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2606     }
2607   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2608 
2609   /* post receives, send and receive mat->a */
2610   /*----------------------------------------*/
2611   for (imdex=0; imdex<nrecvs; imdex++) {
2612     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2613   }
2614   for (i=0; i<nsends; i++){
2615     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2616   }
2617   count = nrecvs;
2618   while (count) {
2619     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2620     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2621     count--;
2622   }
2623   if (nsends) {
2624     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2625   }
2626 
2627   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2628 
2629   /* create redundant matrix */
2630   /*-------------------------*/
2631   if (reuse == MAT_INITIAL_MATRIX){
2632     /* compute rownz_max for preallocation */
2633     for (imdex=0; imdex<nrecvs; imdex++){
2634       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2635       rptr = rbuf_j[imdex];
2636       for (i=0; i<j; i++){
2637         ncols = rptr[i+1] - rptr[i];
2638         if (rownz_max < ncols) rownz_max = ncols;
2639       }
2640     }
2641 
2642     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2643     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2644     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2645     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2646     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2647   } else {
2648     C = *matredundant;
2649   }
2650 
2651   /* insert local matrix entries */
2652   rptr = sbuf_j;
2653   cols = sbuf_j + rend-rstart + 1;
2654   vals = sbuf_a;
2655   for (i=0; i<rend-rstart; i++){
2656     row   = i + rstart;
2657     ncols = rptr[i+1] - rptr[i];
2658     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2659     vals += ncols;
2660     cols += ncols;
2661   }
2662   /* insert received matrix entries */
2663   for (imdex=0; imdex<nrecvs; imdex++){
2664     rstart = rowrange[recv_rank[imdex]];
2665     rend   = rowrange[recv_rank[imdex]+1];
2666     rptr = rbuf_j[imdex];
2667     cols = rbuf_j[imdex] + rend-rstart + 1;
2668     vals = rbuf_a[imdex];
2669     for (i=0; i<rend-rstart; i++){
2670       row   = i + rstart;
2671       ncols = rptr[i+1] - rptr[i];
2672       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2673       vals += ncols;
2674       cols += ncols;
2675     }
2676   }
2677   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2678   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2680   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2681   if (reuse == MAT_INITIAL_MATRIX){
2682     PetscContainer container;
2683     *matredundant = C;
2684     /* create a supporting struct and attach it to C for reuse */
2685     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2686     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2687     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2688     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2689     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2690 
2691     redund->nzlocal = nzlocal;
2692     redund->nsends  = nsends;
2693     redund->nrecvs  = nrecvs;
2694     redund->send_rank = send_rank;
2695     redund->recv_rank = recv_rank;
2696     redund->sbuf_nz = sbuf_nz;
2697     redund->rbuf_nz = rbuf_nz;
2698     redund->sbuf_j  = sbuf_j;
2699     redund->sbuf_a  = sbuf_a;
2700     redund->rbuf_j  = rbuf_j;
2701     redund->rbuf_a  = rbuf_a;
2702 
2703     redund->MatDestroy = C->ops->destroy;
2704     C->ops->destroy    = MatDestroy_MatRedundant;
2705   }
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2711 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2712 {
2713   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2714   PetscErrorCode ierr;
2715   PetscInt       i,*idxb = 0;
2716   PetscScalar    *va,*vb;
2717   Vec            vtmp;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2721   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2722   if (idx) {
2723     for (i=0; i<A->rmap->n; i++) {
2724       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2725     }
2726   }
2727 
2728   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2729   if (idx) {
2730     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2731   }
2732   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2733   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2734 
2735   for (i=0; i<A->rmap->n; i++){
2736     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2737       va[i] = vb[i];
2738       if (idx) idx[i] = a->garray[idxb[i]];
2739     }
2740   }
2741 
2742   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2743   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2744   ierr = PetscFree(idxb);CHKERRQ(ierr);
2745   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2751 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2752 {
2753   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2754   PetscErrorCode ierr;
2755   PetscInt       i,*idxb = 0;
2756   PetscScalar    *va,*vb;
2757   Vec            vtmp;
2758 
2759   PetscFunctionBegin;
2760   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2761   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2762   if (idx) {
2763     for (i=0; i<A->cmap->n; i++) {
2764       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2765     }
2766   }
2767 
2768   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2769   if (idx) {
2770     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2771   }
2772   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2773   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2774 
2775   for (i=0; i<A->rmap->n; i++){
2776     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2777       va[i] = vb[i];
2778       if (idx) idx[i] = a->garray[idxb[i]];
2779     }
2780   }
2781 
2782   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2783   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2784   ierr = PetscFree(idxb);CHKERRQ(ierr);
2785   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2791 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2792 {
2793   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2794   PetscInt       n      = A->rmap->n;
2795   PetscInt       cstart = A->cmap->rstart;
2796   PetscInt      *cmap   = mat->garray;
2797   PetscInt      *diagIdx, *offdiagIdx;
2798   Vec            diagV, offdiagV;
2799   PetscScalar   *a, *diagA, *offdiagA;
2800   PetscInt       r;
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2805   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2806   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2807   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2808   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2809   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2810   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2811   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2812   for(r = 0; r < n; ++r) {
2813     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2814       a[r]   = diagA[r];
2815       idx[r] = cstart + diagIdx[r];
2816     } else {
2817       a[r]   = offdiagA[r];
2818       idx[r] = cmap[offdiagIdx[r]];
2819     }
2820   }
2821   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2822   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2823   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2824   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2825   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2826   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2832 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2833 {
2834   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2835   PetscInt       n      = A->rmap->n;
2836   PetscInt       cstart = A->cmap->rstart;
2837   PetscInt      *cmap   = mat->garray;
2838   PetscInt      *diagIdx, *offdiagIdx;
2839   Vec            diagV, offdiagV;
2840   PetscScalar   *a, *diagA, *offdiagA;
2841   PetscInt       r;
2842   PetscErrorCode ierr;
2843 
2844   PetscFunctionBegin;
2845   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2846   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2847   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2848   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2849   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2850   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2851   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2852   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2853   for(r = 0; r < n; ++r) {
2854     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2855       a[r]   = diagA[r];
2856       idx[r] = cstart + diagIdx[r];
2857     } else {
2858       a[r]   = offdiagA[r];
2859       idx[r] = cmap[offdiagIdx[r]];
2860     }
2861   }
2862   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2863   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2865   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2866   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2867   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2873 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2874 {
2875   PetscErrorCode ierr;
2876   Mat            *dummy;
2877 
2878   PetscFunctionBegin;
2879   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2880   *newmat = *dummy;
2881   ierr = PetscFree(dummy);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2886 /* -------------------------------------------------------------------*/
2887 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2888        MatGetRow_MPIAIJ,
2889        MatRestoreRow_MPIAIJ,
2890        MatMult_MPIAIJ,
2891 /* 4*/ MatMultAdd_MPIAIJ,
2892        MatMultTranspose_MPIAIJ,
2893        MatMultTransposeAdd_MPIAIJ,
2894 #ifdef PETSC_HAVE_PBGL
2895        MatSolve_MPIAIJ,
2896 #else
2897        0,
2898 #endif
2899        0,
2900        0,
2901 /*10*/ 0,
2902        0,
2903        0,
2904        MatSOR_MPIAIJ,
2905        MatTranspose_MPIAIJ,
2906 /*15*/ MatGetInfo_MPIAIJ,
2907        MatEqual_MPIAIJ,
2908        MatGetDiagonal_MPIAIJ,
2909        MatDiagonalScale_MPIAIJ,
2910        MatNorm_MPIAIJ,
2911 /*20*/ MatAssemblyBegin_MPIAIJ,
2912        MatAssemblyEnd_MPIAIJ,
2913        MatSetOption_MPIAIJ,
2914        MatZeroEntries_MPIAIJ,
2915 /*24*/ MatZeroRows_MPIAIJ,
2916        0,
2917 #ifdef PETSC_HAVE_PBGL
2918        0,
2919 #else
2920        0,
2921 #endif
2922        0,
2923        0,
2924 /*29*/ MatSetUpPreallocation_MPIAIJ,
2925 #ifdef PETSC_HAVE_PBGL
2926        0,
2927 #else
2928        0,
2929 #endif
2930        0,
2931        0,
2932        0,
2933 /*34*/ MatDuplicate_MPIAIJ,
2934        0,
2935        0,
2936        0,
2937        0,
2938 /*39*/ MatAXPY_MPIAIJ,
2939        MatGetSubMatrices_MPIAIJ,
2940        MatIncreaseOverlap_MPIAIJ,
2941        MatGetValues_MPIAIJ,
2942        MatCopy_MPIAIJ,
2943 /*44*/ MatGetRowMax_MPIAIJ,
2944        MatScale_MPIAIJ,
2945        0,
2946        0,
2947        MatZeroRowsColumns_MPIAIJ,
2948 /*49*/ MatSetBlockSize_MPIAIJ,
2949        0,
2950        0,
2951        0,
2952        0,
2953 /*54*/ MatFDColoringCreate_MPIAIJ,
2954        0,
2955        MatSetUnfactored_MPIAIJ,
2956        MatPermute_MPIAIJ,
2957        0,
2958 /*59*/ MatGetSubMatrix_MPIAIJ,
2959        MatDestroy_MPIAIJ,
2960        MatView_MPIAIJ,
2961        0,
2962        0,
2963 /*64*/ 0,
2964        0,
2965        0,
2966        0,
2967        0,
2968 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2969        MatGetRowMinAbs_MPIAIJ,
2970        0,
2971        MatSetColoring_MPIAIJ,
2972 #if defined(PETSC_HAVE_ADIC)
2973        MatSetValuesAdic_MPIAIJ,
2974 #else
2975        0,
2976 #endif
2977        MatSetValuesAdifor_MPIAIJ,
2978 /*75*/ MatFDColoringApply_AIJ,
2979        0,
2980        0,
2981        0,
2982        0,
2983 /*80*/ 0,
2984        0,
2985        0,
2986 /*83*/ MatLoad_MPIAIJ,
2987        0,
2988        0,
2989        0,
2990        0,
2991        0,
2992 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2993        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2994        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2995        MatPtAP_Basic,
2996        MatPtAPSymbolic_MPIAIJ,
2997 /*94*/ MatPtAPNumeric_MPIAIJ,
2998        0,
2999        0,
3000        0,
3001        0,
3002 /*99*/ 0,
3003        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3004        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3005        MatConjugate_MPIAIJ,
3006        0,
3007 /*104*/MatSetValuesRow_MPIAIJ,
3008        MatRealPart_MPIAIJ,
3009        MatImaginaryPart_MPIAIJ,
3010        0,
3011        0,
3012 /*109*/0,
3013        MatGetRedundantMatrix_MPIAIJ,
3014        MatGetRowMin_MPIAIJ,
3015        0,
3016        0,
3017 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3018        0,
3019        0,
3020        0,
3021        0,
3022 /*119*/0,
3023        0,
3024        0,
3025        0,
3026        MatGetMultiProcBlock_MPIAIJ,
3027 /*124*/MatFindNonZeroRows_MPIAIJ,
3028        0,
3029        0,
3030        0,
3031        MatGetSubMatricesParallel_MPIAIJ
3032 };
3033 
3034 /* ----------------------------------------------------------------------------------------*/
3035 
3036 EXTERN_C_BEGIN
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3039 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3040 {
3041   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3042   PetscErrorCode ierr;
3043 
3044   PetscFunctionBegin;
3045   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3046   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3047   PetscFunctionReturn(0);
3048 }
3049 EXTERN_C_END
3050 
3051 EXTERN_C_BEGIN
3052 #undef __FUNCT__
3053 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3054 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3055 {
3056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3057   PetscErrorCode ierr;
3058 
3059   PetscFunctionBegin;
3060   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3061   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 EXTERN_C_BEGIN
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3069 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3070 {
3071   Mat_MPIAIJ     *b;
3072   PetscErrorCode ierr;
3073   PetscInt       i;
3074 
3075   PetscFunctionBegin;
3076   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3077   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3078   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3079   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3080 
3081   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3082   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3083   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3084   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3085   if (d_nnz) {
3086     for (i=0; i<B->rmap->n; i++) {
3087       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3088     }
3089   }
3090   if (o_nnz) {
3091     for (i=0; i<B->rmap->n; i++) {
3092       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3093     }
3094   }
3095   b = (Mat_MPIAIJ*)B->data;
3096 
3097   if (!B->preallocated) {
3098     /* Explicitly create 2 MATSEQAIJ matrices. */
3099     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3100     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3101     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3102     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3103     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3104     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3105     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3106     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3107   }
3108 
3109   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3110   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3111   B->preallocated = PETSC_TRUE;
3112   PetscFunctionReturn(0);
3113 }
3114 EXTERN_C_END
3115 
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3118 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3119 {
3120   Mat            mat;
3121   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3122   PetscErrorCode ierr;
3123 
3124   PetscFunctionBegin;
3125   *newmat       = 0;
3126   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3127   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3128   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3129   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3130   a    = (Mat_MPIAIJ*)mat->data;
3131 
3132   mat->factortype    = matin->factortype;
3133   mat->rmap->bs      = matin->rmap->bs;
3134   mat->assembled    = PETSC_TRUE;
3135   mat->insertmode   = NOT_SET_VALUES;
3136   mat->preallocated = PETSC_TRUE;
3137 
3138   a->size           = oldmat->size;
3139   a->rank           = oldmat->rank;
3140   a->donotstash     = oldmat->donotstash;
3141   a->roworiented    = oldmat->roworiented;
3142   a->rowindices     = 0;
3143   a->rowvalues      = 0;
3144   a->getrowactive   = PETSC_FALSE;
3145 
3146   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3147   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3148 
3149   if (oldmat->colmap) {
3150 #if defined (PETSC_USE_CTABLE)
3151     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3152 #else
3153     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3154     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3155     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3156 #endif
3157   } else a->colmap = 0;
3158   if (oldmat->garray) {
3159     PetscInt len;
3160     len  = oldmat->B->cmap->n;
3161     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3162     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3163     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3164   } else a->garray = 0;
3165 
3166   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3167   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3168   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3169   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3170   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3171   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3172   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3173   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3174   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3175   *newmat = mat;
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 /*
3180     Allows sending/receiving larger messages then 2 gigabytes in a single call
3181 */
3182 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3183 {
3184   int             ierr;
3185   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3186   PetscInt        i,numchunks;
3187   PetscMPIInt     icnt;
3188 
3189   numchunks = cnt/CHUNKSIZE + 1;
3190   for (i=0; i<numchunks; i++) {
3191     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3192     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3193     if (type == MPIU_INT) {
3194       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3195     } else if (type == MPIU_SCALAR) {
3196       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3197     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3198   }
3199   return 0;
3200 }
3201 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3202 {
3203   int             ierr;
3204   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3205   MPI_Status      status;
3206   PetscInt        i,numchunks;
3207   PetscMPIInt     icnt;
3208 
3209   numchunks = cnt/CHUNKSIZE + 1;
3210   for (i=0; i<numchunks; i++) {
3211     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3212     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3213     if (type == MPIU_INT) {
3214       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3215     } else if (type == MPIU_SCALAR) {
3216       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3217     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3218   }
3219   return 0;
3220 }
3221 
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatLoad_MPIAIJ"
3224 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3225 {
3226   PetscScalar    *vals,*svals;
3227   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3228   PetscErrorCode ierr;
3229   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3230   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3231   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3232   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3233   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3234   int            fd;
3235 
3236   PetscFunctionBegin;
3237   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3238   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3239   if (!rank) {
3240     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3241     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3242     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3243   }
3244 
3245   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3246 
3247   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3248   M = header[1]; N = header[2];
3249   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3250   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3251   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3252 
3253   /* If global sizes are set, check if they are consistent with that given in the file */
3254   if (sizesset) {
3255     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3256   }
3257   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3258   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3259 
3260   /* determine ownership of all rows */
3261   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3262   else m = newMat->rmap->n; /* Set by user */
3263 
3264   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3265   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3266 
3267   /* First process needs enough room for process with most rows */
3268   if (!rank) {
3269     mmax       = rowners[1];
3270     for (i=2; i<size; i++) {
3271       mmax = PetscMax(mmax,rowners[i]);
3272     }
3273   } else mmax = m;
3274 
3275   rowners[0] = 0;
3276   for (i=2; i<=size; i++) {
3277     rowners[i] += rowners[i-1];
3278   }
3279   rstart = rowners[rank];
3280   rend   = rowners[rank+1];
3281 
3282   /* distribute row lengths to all processors */
3283   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3284   if (!rank) {
3285     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3286     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3287     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3288     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3289     for (j=0; j<m; j++) {
3290       procsnz[0] += ourlens[j];
3291     }
3292     for (i=1; i<size; i++) {
3293       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3294       /* calculate the number of nonzeros on each processor */
3295       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3296         procsnz[i] += rowlengths[j];
3297       }
3298       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3299     }
3300     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3301   } else {
3302     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3303   }
3304 
3305   if (!rank) {
3306     /* determine max buffer needed and allocate it */
3307     maxnz = 0;
3308     for (i=0; i<size; i++) {
3309       maxnz = PetscMax(maxnz,procsnz[i]);
3310     }
3311     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3312 
3313     /* read in my part of the matrix column indices  */
3314     nz   = procsnz[0];
3315     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3316     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3317 
3318     /* read in every one elses and ship off */
3319     for (i=1; i<size; i++) {
3320       nz     = procsnz[i];
3321       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3322       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3323     }
3324     ierr = PetscFree(cols);CHKERRQ(ierr);
3325   } else {
3326     /* determine buffer space needed for message */
3327     nz = 0;
3328     for (i=0; i<m; i++) {
3329       nz += ourlens[i];
3330     }
3331     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3332 
3333     /* receive message of column indices*/
3334     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3335   }
3336 
3337   /* determine column ownership if matrix is not square */
3338   if (N != M) {
3339     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3340     else n = newMat->cmap->n;
3341     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3342     cstart = cend - n;
3343   } else {
3344     cstart = rstart;
3345     cend   = rend;
3346     n      = cend - cstart;
3347   }
3348 
3349   /* loop over local rows, determining number of off diagonal entries */
3350   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3351   jj = 0;
3352   for (i=0; i<m; i++) {
3353     for (j=0; j<ourlens[i]; j++) {
3354       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3355       jj++;
3356     }
3357   }
3358 
3359   for (i=0; i<m; i++) {
3360     ourlens[i] -= offlens[i];
3361   }
3362   if (!sizesset) {
3363     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3364   }
3365   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3366 
3367   for (i=0; i<m; i++) {
3368     ourlens[i] += offlens[i];
3369   }
3370 
3371   if (!rank) {
3372     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3373 
3374     /* read in my part of the matrix numerical values  */
3375     nz   = procsnz[0];
3376     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3377 
3378     /* insert into matrix */
3379     jj      = rstart;
3380     smycols = mycols;
3381     svals   = vals;
3382     for (i=0; i<m; i++) {
3383       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3384       smycols += ourlens[i];
3385       svals   += ourlens[i];
3386       jj++;
3387     }
3388 
3389     /* read in other processors and ship out */
3390     for (i=1; i<size; i++) {
3391       nz     = procsnz[i];
3392       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3393       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3394     }
3395     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3396   } else {
3397     /* receive numeric values */
3398     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3399 
3400     /* receive message of values*/
3401     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3402 
3403     /* insert into matrix */
3404     jj      = rstart;
3405     smycols = mycols;
3406     svals   = vals;
3407     for (i=0; i<m; i++) {
3408       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3409       smycols += ourlens[i];
3410       svals   += ourlens[i];
3411       jj++;
3412     }
3413   }
3414   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3415   ierr = PetscFree(vals);CHKERRQ(ierr);
3416   ierr = PetscFree(mycols);CHKERRQ(ierr);
3417   ierr = PetscFree(rowners);CHKERRQ(ierr);
3418 
3419   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3420   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3426 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3427 {
3428   PetscErrorCode ierr;
3429   IS             iscol_local;
3430   PetscInt       csize;
3431 
3432   PetscFunctionBegin;
3433   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3434   if (call == MAT_REUSE_MATRIX) {
3435     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3436     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3437   } else {
3438     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3439   }
3440   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3441   if (call == MAT_INITIAL_MATRIX) {
3442     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3443     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3444   }
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 #undef __FUNCT__
3449 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3450 /*
3451     Not great since it makes two copies of the submatrix, first an SeqAIJ
3452   in local and then by concatenating the local matrices the end result.
3453   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3454 
3455   Note: This requires a sequential iscol with all indices.
3456 */
3457 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3458 {
3459   PetscErrorCode ierr;
3460   PetscMPIInt    rank,size;
3461   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3462   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3463   Mat            *local,M,Mreuse;
3464   MatScalar      *vwork,*aa;
3465   MPI_Comm       comm = ((PetscObject)mat)->comm;
3466   Mat_SeqAIJ     *aij;
3467 
3468 
3469   PetscFunctionBegin;
3470   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3471   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3472 
3473   if (call ==  MAT_REUSE_MATRIX) {
3474     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3475     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3476     local = &Mreuse;
3477     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3478   } else {
3479     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3480     Mreuse = *local;
3481     ierr   = PetscFree(local);CHKERRQ(ierr);
3482   }
3483 
3484   /*
3485       m - number of local rows
3486       n - number of columns (same on all processors)
3487       rstart - first row in new global matrix generated
3488   */
3489   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3490   if (call == MAT_INITIAL_MATRIX) {
3491     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3492     ii  = aij->i;
3493     jj  = aij->j;
3494 
3495     /*
3496         Determine the number of non-zeros in the diagonal and off-diagonal
3497         portions of the matrix in order to do correct preallocation
3498     */
3499 
3500     /* first get start and end of "diagonal" columns */
3501     if (csize == PETSC_DECIDE) {
3502       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3503       if (mglobal == n) { /* square matrix */
3504 	nlocal = m;
3505       } else {
3506         nlocal = n/size + ((n % size) > rank);
3507       }
3508     } else {
3509       nlocal = csize;
3510     }
3511     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3512     rstart = rend - nlocal;
3513     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3514 
3515     /* next, compute all the lengths */
3516     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3517     olens = dlens + m;
3518     for (i=0; i<m; i++) {
3519       jend = ii[i+1] - ii[i];
3520       olen = 0;
3521       dlen = 0;
3522       for (j=0; j<jend; j++) {
3523         if (*jj < rstart || *jj >= rend) olen++;
3524         else dlen++;
3525         jj++;
3526       }
3527       olens[i] = olen;
3528       dlens[i] = dlen;
3529     }
3530     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3531     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3532     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3533     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3534     ierr = PetscFree(dlens);CHKERRQ(ierr);
3535   } else {
3536     PetscInt ml,nl;
3537 
3538     M = *newmat;
3539     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3540     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3541     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3542     /*
3543          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3544        rather than the slower MatSetValues().
3545     */
3546     M->was_assembled = PETSC_TRUE;
3547     M->assembled     = PETSC_FALSE;
3548   }
3549   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3550   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3551   ii  = aij->i;
3552   jj  = aij->j;
3553   aa  = aij->a;
3554   for (i=0; i<m; i++) {
3555     row   = rstart + i;
3556     nz    = ii[i+1] - ii[i];
3557     cwork = jj;     jj += nz;
3558     vwork = aa;     aa += nz;
3559     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3560   }
3561 
3562   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3563   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3564   *newmat = M;
3565 
3566   /* save submatrix used in processor for next request */
3567   if (call ==  MAT_INITIAL_MATRIX) {
3568     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3569     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3570   }
3571 
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 EXTERN_C_BEGIN
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3578 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3579 {
3580   PetscInt       m,cstart, cend,j,nnz,i,d;
3581   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3582   const PetscInt *JJ;
3583   PetscScalar    *values;
3584   PetscErrorCode ierr;
3585 
3586   PetscFunctionBegin;
3587   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3588 
3589   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3590   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3591   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3592   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3593   m      = B->rmap->n;
3594   cstart = B->cmap->rstart;
3595   cend   = B->cmap->rend;
3596   rstart = B->rmap->rstart;
3597 
3598   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3599 
3600 #if defined(PETSC_USE_DEBUGGING)
3601   for (i=0; i<m; i++) {
3602     nnz     = Ii[i+1]- Ii[i];
3603     JJ      = J + Ii[i];
3604     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3605     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3606     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3607   }
3608 #endif
3609 
3610   for (i=0; i<m; i++) {
3611     nnz     = Ii[i+1]- Ii[i];
3612     JJ      = J + Ii[i];
3613     nnz_max = PetscMax(nnz_max,nnz);
3614     d       = 0;
3615     for (j=0; j<nnz; j++) {
3616       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3617     }
3618     d_nnz[i] = d;
3619     o_nnz[i] = nnz - d;
3620   }
3621   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3622   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3623 
3624   if (v) values = (PetscScalar*)v;
3625   else {
3626     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3627     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3628   }
3629 
3630   for (i=0; i<m; i++) {
3631     ii   = i + rstart;
3632     nnz  = Ii[i+1]- Ii[i];
3633     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3634   }
3635   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637 
3638   if (!v) {
3639     ierr = PetscFree(values);CHKERRQ(ierr);
3640   }
3641   PetscFunctionReturn(0);
3642 }
3643 EXTERN_C_END
3644 
3645 #undef __FUNCT__
3646 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3647 /*@
3648    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3649    (the default parallel PETSc format).
3650 
3651    Collective on MPI_Comm
3652 
3653    Input Parameters:
3654 +  B - the matrix
3655 .  i - the indices into j for the start of each local row (starts with zero)
3656 .  j - the column indices for each local row (starts with zero)
3657 -  v - optional values in the matrix
3658 
3659    Level: developer
3660 
3661    Notes:
3662        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3663      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3664      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3665 
3666        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3667 
3668        The format which is used for the sparse matrix input, is equivalent to a
3669     row-major ordering.. i.e for the following matrix, the input data expected is
3670     as shown:
3671 
3672         1 0 0
3673         2 0 3     P0
3674        -------
3675         4 5 6     P1
3676 
3677      Process0 [P0]: rows_owned=[0,1]
3678         i =  {0,1,3}  [size = nrow+1  = 2+1]
3679         j =  {0,0,2}  [size = nz = 6]
3680         v =  {1,2,3}  [size = nz = 6]
3681 
3682      Process1 [P1]: rows_owned=[2]
3683         i =  {0,3}    [size = nrow+1  = 1+1]
3684         j =  {0,1,2}  [size = nz = 6]
3685         v =  {4,5,6}  [size = nz = 6]
3686 
3687 .keywords: matrix, aij, compressed row, sparse, parallel
3688 
3689 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3690           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3691 @*/
3692 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3693 {
3694   PetscErrorCode ierr;
3695 
3696   PetscFunctionBegin;
3697   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3698   PetscFunctionReturn(0);
3699 }
3700 
3701 #undef __FUNCT__
3702 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3703 /*@C
3704    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3705    (the default parallel PETSc format).  For good matrix assembly performance
3706    the user should preallocate the matrix storage by setting the parameters
3707    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3708    performance can be increased by more than a factor of 50.
3709 
3710    Collective on MPI_Comm
3711 
3712    Input Parameters:
3713 +  A - the matrix
3714 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3715            (same value is used for all local rows)
3716 .  d_nnz - array containing the number of nonzeros in the various rows of the
3717            DIAGONAL portion of the local submatrix (possibly different for each row)
3718            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3719            The size of this array is equal to the number of local rows, i.e 'm'.
3720            You must leave room for the diagonal entry even if it is zero.
3721 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3722            submatrix (same value is used for all local rows).
3723 -  o_nnz - array containing the number of nonzeros in the various rows of the
3724            OFF-DIAGONAL portion of the local submatrix (possibly different for
3725            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3726            structure. The size of this array is equal to the number
3727            of local rows, i.e 'm'.
3728 
3729    If the *_nnz parameter is given then the *_nz parameter is ignored
3730 
3731    The AIJ format (also called the Yale sparse matrix format or
3732    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3733    storage.  The stored row and column indices begin with zero.
3734    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3735 
3736    The parallel matrix is partitioned such that the first m0 rows belong to
3737    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3738    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3739 
3740    The DIAGONAL portion of the local submatrix of a processor can be defined
3741    as the submatrix which is obtained by extraction the part corresponding to
3742    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3743    first row that belongs to the processor, r2 is the last row belonging to
3744    the this processor, and c1-c2 is range of indices of the local part of a
3745    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3746    common case of a square matrix, the row and column ranges are the same and
3747    the DIAGONAL part is also square. The remaining portion of the local
3748    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3749 
3750    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3751 
3752    You can call MatGetInfo() to get information on how effective the preallocation was;
3753    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3754    You can also run with the option -info and look for messages with the string
3755    malloc in them to see if additional memory allocation was needed.
3756 
3757    Example usage:
3758 
3759    Consider the following 8x8 matrix with 34 non-zero values, that is
3760    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3761    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3762    as follows:
3763 
3764 .vb
3765             1  2  0  |  0  3  0  |  0  4
3766     Proc0   0  5  6  |  7  0  0  |  8  0
3767             9  0 10  | 11  0  0  | 12  0
3768     -------------------------------------
3769            13  0 14  | 15 16 17  |  0  0
3770     Proc1   0 18  0  | 19 20 21  |  0  0
3771             0  0  0  | 22 23  0  | 24  0
3772     -------------------------------------
3773     Proc2  25 26 27  |  0  0 28  | 29  0
3774            30  0  0  | 31 32 33  |  0 34
3775 .ve
3776 
3777    This can be represented as a collection of submatrices as:
3778 
3779 .vb
3780       A B C
3781       D E F
3782       G H I
3783 .ve
3784 
3785    Where the submatrices A,B,C are owned by proc0, D,E,F are
3786    owned by proc1, G,H,I are owned by proc2.
3787 
3788    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3789    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3790    The 'M','N' parameters are 8,8, and have the same values on all procs.
3791 
3792    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3793    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3794    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3795    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3796    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3797    matrix, ans [DF] as another SeqAIJ matrix.
3798 
3799    When d_nz, o_nz parameters are specified, d_nz storage elements are
3800    allocated for every row of the local diagonal submatrix, and o_nz
3801    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3802    One way to choose d_nz and o_nz is to use the max nonzerors per local
3803    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3804    In this case, the values of d_nz,o_nz are:
3805 .vb
3806      proc0 : dnz = 2, o_nz = 2
3807      proc1 : dnz = 3, o_nz = 2
3808      proc2 : dnz = 1, o_nz = 4
3809 .ve
3810    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3811    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3812    for proc3. i.e we are using 12+15+10=37 storage locations to store
3813    34 values.
3814 
3815    When d_nnz, o_nnz parameters are specified, the storage is specified
3816    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3817    In the above case the values for d_nnz,o_nnz are:
3818 .vb
3819      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3820      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3821      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3822 .ve
3823    Here the space allocated is sum of all the above values i.e 34, and
3824    hence pre-allocation is perfect.
3825 
3826    Level: intermediate
3827 
3828 .keywords: matrix, aij, compressed row, sparse, parallel
3829 
3830 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3831           MPIAIJ, MatGetInfo()
3832 @*/
3833 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3834 {
3835   PetscErrorCode ierr;
3836 
3837   PetscFunctionBegin;
3838   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3844 /*@
3845      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3846          CSR format the local rows.
3847 
3848    Collective on MPI_Comm
3849 
3850    Input Parameters:
3851 +  comm - MPI communicator
3852 .  m - number of local rows (Cannot be PETSC_DECIDE)
3853 .  n - This value should be the same as the local size used in creating the
3854        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3855        calculated if N is given) For square matrices n is almost always m.
3856 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3857 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3858 .   i - row indices
3859 .   j - column indices
3860 -   a - matrix values
3861 
3862    Output Parameter:
3863 .   mat - the matrix
3864 
3865    Level: intermediate
3866 
3867    Notes:
3868        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3869      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3870      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3871 
3872        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3873 
3874        The format which is used for the sparse matrix input, is equivalent to a
3875     row-major ordering.. i.e for the following matrix, the input data expected is
3876     as shown:
3877 
3878         1 0 0
3879         2 0 3     P0
3880        -------
3881         4 5 6     P1
3882 
3883      Process0 [P0]: rows_owned=[0,1]
3884         i =  {0,1,3}  [size = nrow+1  = 2+1]
3885         j =  {0,0,2}  [size = nz = 6]
3886         v =  {1,2,3}  [size = nz = 6]
3887 
3888      Process1 [P1]: rows_owned=[2]
3889         i =  {0,3}    [size = nrow+1  = 1+1]
3890         j =  {0,1,2}  [size = nz = 6]
3891         v =  {4,5,6}  [size = nz = 6]
3892 
3893 .keywords: matrix, aij, compressed row, sparse, parallel
3894 
3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3896           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3897 @*/
3898 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3899 {
3900   PetscErrorCode ierr;
3901 
3902  PetscFunctionBegin;
3903   if (i[0]) {
3904     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3905   }
3906   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3907   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3908   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3909   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3910   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatCreateMPIAIJ"
3916 /*@C
3917    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3918    (the default parallel PETSc format).  For good matrix assembly performance
3919    the user should preallocate the matrix storage by setting the parameters
3920    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3921    performance can be increased by more than a factor of 50.
3922 
3923    Collective on MPI_Comm
3924 
3925    Input Parameters:
3926 +  comm - MPI communicator
3927 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3928            This value should be the same as the local size used in creating the
3929            y vector for the matrix-vector product y = Ax.
3930 .  n - This value should be the same as the local size used in creating the
3931        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3932        calculated if N is given) For square matrices n is almost always m.
3933 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3934 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3935 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3936            (same value is used for all local rows)
3937 .  d_nnz - array containing the number of nonzeros in the various rows of the
3938            DIAGONAL portion of the local submatrix (possibly different for each row)
3939            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3940            The size of this array is equal to the number of local rows, i.e 'm'.
3941            You must leave room for the diagonal entry even if it is zero.
3942 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3943            submatrix (same value is used for all local rows).
3944 -  o_nnz - array containing the number of nonzeros in the various rows of the
3945            OFF-DIAGONAL portion of the local submatrix (possibly different for
3946            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3947            structure. The size of this array is equal to the number
3948            of local rows, i.e 'm'.
3949 
3950    Output Parameter:
3951 .  A - the matrix
3952 
3953    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3954    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3955    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3956 
3957    Notes:
3958    If the *_nnz parameter is given then the *_nz parameter is ignored
3959 
3960    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3961    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3962    storage requirements for this matrix.
3963 
3964    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3965    processor than it must be used on all processors that share the object for
3966    that argument.
3967 
3968    The user MUST specify either the local or global matrix dimensions
3969    (possibly both).
3970 
3971    The parallel matrix is partitioned across processors such that the
3972    first m0 rows belong to process 0, the next m1 rows belong to
3973    process 1, the next m2 rows belong to process 2 etc.. where
3974    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3975    values corresponding to [m x N] submatrix.
3976 
3977    The columns are logically partitioned with the n0 columns belonging
3978    to 0th partition, the next n1 columns belonging to the next
3979    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3980 
3981    The DIAGONAL portion of the local submatrix on any given processor
3982    is the submatrix corresponding to the rows and columns m,n
3983    corresponding to the given processor. i.e diagonal matrix on
3984    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3985    etc. The remaining portion of the local submatrix [m x (N-n)]
3986    constitute the OFF-DIAGONAL portion. The example below better
3987    illustrates this concept.
3988 
3989    For a square global matrix we define each processor's diagonal portion
3990    to be its local rows and the corresponding columns (a square submatrix);
3991    each processor's off-diagonal portion encompasses the remainder of the
3992    local matrix (a rectangular submatrix).
3993 
3994    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3995 
3996    When calling this routine with a single process communicator, a matrix of
3997    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3998    type of communicator, use the construction mechanism:
3999      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4000 
4001    By default, this format uses inodes (identical nodes) when possible.
4002    We search for consecutive rows with the same nonzero structure, thereby
4003    reusing matrix information to achieve increased efficiency.
4004 
4005    Options Database Keys:
4006 +  -mat_no_inode  - Do not use inodes
4007 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4008 -  -mat_aij_oneindex - Internally use indexing starting at 1
4009         rather than 0.  Note that when calling MatSetValues(),
4010         the user still MUST index entries starting at 0!
4011 
4012 
4013    Example usage:
4014 
4015    Consider the following 8x8 matrix with 34 non-zero values, that is
4016    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4017    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4018    as follows:
4019 
4020 .vb
4021             1  2  0  |  0  3  0  |  0  4
4022     Proc0   0  5  6  |  7  0  0  |  8  0
4023             9  0 10  | 11  0  0  | 12  0
4024     -------------------------------------
4025            13  0 14  | 15 16 17  |  0  0
4026     Proc1   0 18  0  | 19 20 21  |  0  0
4027             0  0  0  | 22 23  0  | 24  0
4028     -------------------------------------
4029     Proc2  25 26 27  |  0  0 28  | 29  0
4030            30  0  0  | 31 32 33  |  0 34
4031 .ve
4032 
4033    This can be represented as a collection of submatrices as:
4034 
4035 .vb
4036       A B C
4037       D E F
4038       G H I
4039 .ve
4040 
4041    Where the submatrices A,B,C are owned by proc0, D,E,F are
4042    owned by proc1, G,H,I are owned by proc2.
4043 
4044    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4045    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4046    The 'M','N' parameters are 8,8, and have the same values on all procs.
4047 
4048    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4049    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4050    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4051    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4052    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4053    matrix, ans [DF] as another SeqAIJ matrix.
4054 
4055    When d_nz, o_nz parameters are specified, d_nz storage elements are
4056    allocated for every row of the local diagonal submatrix, and o_nz
4057    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4058    One way to choose d_nz and o_nz is to use the max nonzerors per local
4059    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4060    In this case, the values of d_nz,o_nz are:
4061 .vb
4062      proc0 : dnz = 2, o_nz = 2
4063      proc1 : dnz = 3, o_nz = 2
4064      proc2 : dnz = 1, o_nz = 4
4065 .ve
4066    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4067    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4068    for proc3. i.e we are using 12+15+10=37 storage locations to store
4069    34 values.
4070 
4071    When d_nnz, o_nnz parameters are specified, the storage is specified
4072    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4073    In the above case the values for d_nnz,o_nnz are:
4074 .vb
4075      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4076      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4077      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4078 .ve
4079    Here the space allocated is sum of all the above values i.e 34, and
4080    hence pre-allocation is perfect.
4081 
4082    Level: intermediate
4083 
4084 .keywords: matrix, aij, compressed row, sparse, parallel
4085 
4086 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4087           MPIAIJ, MatCreateMPIAIJWithArrays()
4088 @*/
4089 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4090 {
4091   PetscErrorCode ierr;
4092   PetscMPIInt    size;
4093 
4094   PetscFunctionBegin;
4095   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4096   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4097   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4098   if (size > 1) {
4099     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4100     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4101   } else {
4102     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4103     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4104   }
4105   PetscFunctionReturn(0);
4106 }
4107 
4108 #undef __FUNCT__
4109 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4110 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4111 {
4112   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4113 
4114   PetscFunctionBegin;
4115   *Ad     = a->A;
4116   *Ao     = a->B;
4117   *colmap = a->garray;
4118   PetscFunctionReturn(0);
4119 }
4120 
4121 #undef __FUNCT__
4122 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4123 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4124 {
4125   PetscErrorCode ierr;
4126   PetscInt       i;
4127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4128 
4129   PetscFunctionBegin;
4130   if (coloring->ctype == IS_COLORING_GLOBAL) {
4131     ISColoringValue *allcolors,*colors;
4132     ISColoring      ocoloring;
4133 
4134     /* set coloring for diagonal portion */
4135     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4136 
4137     /* set coloring for off-diagonal portion */
4138     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4139     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4140     for (i=0; i<a->B->cmap->n; i++) {
4141       colors[i] = allcolors[a->garray[i]];
4142     }
4143     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4144     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4145     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4146     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4147   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4148     ISColoringValue *colors;
4149     PetscInt        *larray;
4150     ISColoring      ocoloring;
4151 
4152     /* set coloring for diagonal portion */
4153     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4154     for (i=0; i<a->A->cmap->n; i++) {
4155       larray[i] = i + A->cmap->rstart;
4156     }
4157     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4158     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4159     for (i=0; i<a->A->cmap->n; i++) {
4160       colors[i] = coloring->colors[larray[i]];
4161     }
4162     ierr = PetscFree(larray);CHKERRQ(ierr);
4163     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4164     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4165     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4166 
4167     /* set coloring for off-diagonal portion */
4168     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4169     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4170     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4171     for (i=0; i<a->B->cmap->n; i++) {
4172       colors[i] = coloring->colors[larray[i]];
4173     }
4174     ierr = PetscFree(larray);CHKERRQ(ierr);
4175     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4176     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4177     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4178   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4179 
4180   PetscFunctionReturn(0);
4181 }
4182 
4183 #if defined(PETSC_HAVE_ADIC)
4184 #undef __FUNCT__
4185 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4186 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4187 {
4188   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4189   PetscErrorCode ierr;
4190 
4191   PetscFunctionBegin;
4192   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4193   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4194   PetscFunctionReturn(0);
4195 }
4196 #endif
4197 
4198 #undef __FUNCT__
4199 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4200 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4201 {
4202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4203   PetscErrorCode ierr;
4204 
4205   PetscFunctionBegin;
4206   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4207   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4208   PetscFunctionReturn(0);
4209 }
4210 
4211 #undef __FUNCT__
4212 #define __FUNCT__ "MatMerge"
4213 /*@
4214       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4215                  matrices from each processor
4216 
4217     Collective on MPI_Comm
4218 
4219    Input Parameters:
4220 +    comm - the communicators the parallel matrix will live on
4221 .    inmat - the input sequential matrices
4222 .    n - number of local columns (or PETSC_DECIDE)
4223 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4224 
4225    Output Parameter:
4226 .    outmat - the parallel matrix generated
4227 
4228     Level: advanced
4229 
4230    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4231 
4232 @*/
4233 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4234 {
4235   PetscErrorCode ierr;
4236   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4237   PetscInt       *indx;
4238   PetscScalar    *values;
4239 
4240   PetscFunctionBegin;
4241   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4242   if (scall == MAT_INITIAL_MATRIX){
4243     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4244     if (n == PETSC_DECIDE){
4245       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4246     }
4247     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4248     rstart -= m;
4249 
4250     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4251     for (i=0;i<m;i++) {
4252       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4253       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4254       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4255     }
4256     /* This routine will ONLY return MPIAIJ type matrix */
4257     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4258     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4259     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4260     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4261     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4262 
4263   } else if (scall == MAT_REUSE_MATRIX){
4264     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4265   } else {
4266     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4267   }
4268 
4269   for (i=0;i<m;i++) {
4270     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4271     Ii    = i + rstart;
4272     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4273     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4274   }
4275   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4276   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4277   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4278 
4279   PetscFunctionReturn(0);
4280 }
4281 
4282 #undef __FUNCT__
4283 #define __FUNCT__ "MatFileSplit"
4284 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4285 {
4286   PetscErrorCode    ierr;
4287   PetscMPIInt       rank;
4288   PetscInt          m,N,i,rstart,nnz;
4289   size_t            len;
4290   const PetscInt    *indx;
4291   PetscViewer       out;
4292   char              *name;
4293   Mat               B;
4294   const PetscScalar *values;
4295 
4296   PetscFunctionBegin;
4297   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4298   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4299   /* Should this be the type of the diagonal block of A? */
4300   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4301   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4302   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4303   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4304   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4305   for (i=0;i<m;i++) {
4306     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4307     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4308     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4309   }
4310   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4311   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4312 
4313   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4314   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4315   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4316   sprintf(name,"%s.%d",outfile,rank);
4317   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4318   ierr = PetscFree(name);
4319   ierr = MatView(B,out);CHKERRQ(ierr);
4320   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4321   ierr = MatDestroy(&B);CHKERRQ(ierr);
4322   PetscFunctionReturn(0);
4323 }
4324 
4325 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4326 #undef __FUNCT__
4327 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4328 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4329 {
4330   PetscErrorCode       ierr;
4331   Mat_Merge_SeqsToMPI  *merge;
4332   PetscContainer       container;
4333 
4334   PetscFunctionBegin;
4335   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4336   if (container) {
4337     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4338     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4339     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4340     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4341     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4342     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4343     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4344     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4347     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4348     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4349     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4350     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4351 
4352     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4353     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4354   }
4355   ierr = PetscFree(merge);CHKERRQ(ierr);
4356 
4357   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 #include <../src/mat/utils/freespace.h>
4362 #include <petscbt.h>
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4366 /*@C
4367       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4368                  matrices from each processor
4369 
4370     Collective on MPI_Comm
4371 
4372    Input Parameters:
4373 +    comm - the communicators the parallel matrix will live on
4374 .    seqmat - the input sequential matrices
4375 .    m - number of local rows (or PETSC_DECIDE)
4376 .    n - number of local columns (or PETSC_DECIDE)
4377 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4378 
4379    Output Parameter:
4380 .    mpimat - the parallel matrix generated
4381 
4382     Level: advanced
4383 
4384    Notes:
4385      The dimensions of the sequential matrix in each processor MUST be the same.
4386      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4387      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4388 @*/
4389 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4390 {
4391   PetscErrorCode       ierr;
4392   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4393   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4394   PetscMPIInt          size,rank,taga,*len_s;
4395   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4396   PetscInt             proc,m;
4397   PetscInt             **buf_ri,**buf_rj;
4398   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4399   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4400   MPI_Request          *s_waits,*r_waits;
4401   MPI_Status           *status;
4402   MatScalar            *aa=a->a;
4403   MatScalar            **abuf_r,*ba_i;
4404   Mat_Merge_SeqsToMPI  *merge;
4405   PetscContainer       container;
4406 
4407   PetscFunctionBegin;
4408   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4409 
4410   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4411   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4412 
4413   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4414   if (container) {
4415     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4416   }
4417   bi     = merge->bi;
4418   bj     = merge->bj;
4419   buf_ri = merge->buf_ri;
4420   buf_rj = merge->buf_rj;
4421 
4422   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4423   owners = merge->rowmap->range;
4424   len_s  = merge->len_s;
4425 
4426   /* send and recv matrix values */
4427   /*-----------------------------*/
4428   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4429   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4430 
4431   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4432   for (proc=0,k=0; proc<size; proc++){
4433     if (!len_s[proc]) continue;
4434     i = owners[proc];
4435     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4436     k++;
4437   }
4438 
4439   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4440   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4441   ierr = PetscFree(status);CHKERRQ(ierr);
4442 
4443   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4444   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4445 
4446   /* insert mat values of mpimat */
4447   /*----------------------------*/
4448   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4449   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4450 
4451   for (k=0; k<merge->nrecv; k++){
4452     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4453     nrows = *(buf_ri_k[k]);
4454     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4455     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4456   }
4457 
4458   /* set values of ba */
4459   m = merge->rowmap->n;
4460   for (i=0; i<m; i++) {
4461     arow = owners[rank] + i;
4462     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4463     bnzi = bi[i+1] - bi[i];
4464     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4465 
4466     /* add local non-zero vals of this proc's seqmat into ba */
4467     anzi = ai[arow+1] - ai[arow];
4468     aj   = a->j + ai[arow];
4469     aa   = a->a + ai[arow];
4470     nextaj = 0;
4471     for (j=0; nextaj<anzi; j++){
4472       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4473         ba_i[j] += aa[nextaj++];
4474       }
4475     }
4476 
4477     /* add received vals into ba */
4478     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4479       /* i-th row */
4480       if (i == *nextrow[k]) {
4481         anzi = *(nextai[k]+1) - *nextai[k];
4482         aj   = buf_rj[k] + *(nextai[k]);
4483         aa   = abuf_r[k] + *(nextai[k]);
4484         nextaj = 0;
4485         for (j=0; nextaj<anzi; j++){
4486           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4487             ba_i[j] += aa[nextaj++];
4488           }
4489         }
4490         nextrow[k]++; nextai[k]++;
4491       }
4492     }
4493     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4494   }
4495   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4496   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4497 
4498   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4499   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4500   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4501   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4502   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4503   PetscFunctionReturn(0);
4504 }
4505 
4506 #undef __FUNCT__
4507 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4508 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4509 {
4510   PetscErrorCode       ierr;
4511   Mat                  B_mpi;
4512   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4513   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4514   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4515   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4516   PetscInt             len,proc,*dnz,*onz;
4517   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4518   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4519   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4520   MPI_Status           *status;
4521   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4522   PetscBT              lnkbt;
4523   Mat_Merge_SeqsToMPI  *merge;
4524   PetscContainer       container;
4525 
4526   PetscFunctionBegin;
4527   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4528 
4529   /* make sure it is a PETSc comm */
4530   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4532   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4533 
4534   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4535   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4536 
4537   /* determine row ownership */
4538   /*---------------------------------------------------------*/
4539   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4540   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4541   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4542   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4543   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4544   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4545   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4546 
4547   m      = merge->rowmap->n;
4548   M      = merge->rowmap->N;
4549   owners = merge->rowmap->range;
4550 
4551   /* determine the number of messages to send, their lengths */
4552   /*---------------------------------------------------------*/
4553   len_s  = merge->len_s;
4554 
4555   len = 0;  /* length of buf_si[] */
4556   merge->nsend = 0;
4557   for (proc=0; proc<size; proc++){
4558     len_si[proc] = 0;
4559     if (proc == rank){
4560       len_s[proc] = 0;
4561     } else {
4562       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4563       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4564     }
4565     if (len_s[proc]) {
4566       merge->nsend++;
4567       nrows = 0;
4568       for (i=owners[proc]; i<owners[proc+1]; i++){
4569         if (ai[i+1] > ai[i]) nrows++;
4570       }
4571       len_si[proc] = 2*(nrows+1);
4572       len += len_si[proc];
4573     }
4574   }
4575 
4576   /* determine the number and length of messages to receive for ij-structure */
4577   /*-------------------------------------------------------------------------*/
4578   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4579   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4580 
4581   /* post the Irecv of j-structure */
4582   /*-------------------------------*/
4583   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4584   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4585 
4586   /* post the Isend of j-structure */
4587   /*--------------------------------*/
4588   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4589 
4590   for (proc=0, k=0; proc<size; proc++){
4591     if (!len_s[proc]) continue;
4592     i = owners[proc];
4593     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4594     k++;
4595   }
4596 
4597   /* receives and sends of j-structure are complete */
4598   /*------------------------------------------------*/
4599   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4600   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4601 
4602   /* send and recv i-structure */
4603   /*---------------------------*/
4604   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4605   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4606 
4607   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4608   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4609   for (proc=0,k=0; proc<size; proc++){
4610     if (!len_s[proc]) continue;
4611     /* form outgoing message for i-structure:
4612          buf_si[0]:                 nrows to be sent
4613                [1:nrows]:           row index (global)
4614                [nrows+1:2*nrows+1]: i-structure index
4615     */
4616     /*-------------------------------------------*/
4617     nrows = len_si[proc]/2 - 1;
4618     buf_si_i    = buf_si + nrows+1;
4619     buf_si[0]   = nrows;
4620     buf_si_i[0] = 0;
4621     nrows = 0;
4622     for (i=owners[proc]; i<owners[proc+1]; i++){
4623       anzi = ai[i+1] - ai[i];
4624       if (anzi) {
4625         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4626         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4627         nrows++;
4628       }
4629     }
4630     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4631     k++;
4632     buf_si += len_si[proc];
4633   }
4634 
4635   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4636   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4637 
4638   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4639   for (i=0; i<merge->nrecv; i++){
4640     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4641   }
4642 
4643   ierr = PetscFree(len_si);CHKERRQ(ierr);
4644   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4645   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4646   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4647   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4648   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4649   ierr = PetscFree(status);CHKERRQ(ierr);
4650 
4651   /* compute a local seq matrix in each processor */
4652   /*----------------------------------------------*/
4653   /* allocate bi array and free space for accumulating nonzero column info */
4654   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4655   bi[0] = 0;
4656 
4657   /* create and initialize a linked list */
4658   nlnk = N+1;
4659   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4660 
4661   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4662   len = 0;
4663   len  = ai[owners[rank+1]] - ai[owners[rank]];
4664   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4665   current_space = free_space;
4666 
4667   /* determine symbolic info for each local row */
4668   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4669 
4670   for (k=0; k<merge->nrecv; k++){
4671     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4672     nrows = *buf_ri_k[k];
4673     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4674     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4675   }
4676 
4677   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4678   len = 0;
4679   for (i=0;i<m;i++) {
4680     bnzi   = 0;
4681     /* add local non-zero cols of this proc's seqmat into lnk */
4682     arow   = owners[rank] + i;
4683     anzi   = ai[arow+1] - ai[arow];
4684     aj     = a->j + ai[arow];
4685     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4686     bnzi += nlnk;
4687     /* add received col data into lnk */
4688     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4689       if (i == *nextrow[k]) { /* i-th row */
4690         anzi = *(nextai[k]+1) - *nextai[k];
4691         aj   = buf_rj[k] + *nextai[k];
4692         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4693         bnzi += nlnk;
4694         nextrow[k]++; nextai[k]++;
4695       }
4696     }
4697     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4698 
4699     /* if free space is not available, make more free space */
4700     if (current_space->local_remaining<bnzi) {
4701       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4702       nspacedouble++;
4703     }
4704     /* copy data into free space, then initialize lnk */
4705     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4706     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4707 
4708     current_space->array           += bnzi;
4709     current_space->local_used      += bnzi;
4710     current_space->local_remaining -= bnzi;
4711 
4712     bi[i+1] = bi[i] + bnzi;
4713   }
4714 
4715   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4716 
4717   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4718   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4719   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4720 
4721   /* create symbolic parallel matrix B_mpi */
4722   /*---------------------------------------*/
4723   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4724   if (n==PETSC_DECIDE) {
4725     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4726   } else {
4727     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4728   }
4729   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4730   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4731   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4732 
4733   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4734   B_mpi->assembled     = PETSC_FALSE;
4735   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4736   merge->bi            = bi;
4737   merge->bj            = bj;
4738   merge->buf_ri        = buf_ri;
4739   merge->buf_rj        = buf_rj;
4740   merge->coi           = PETSC_NULL;
4741   merge->coj           = PETSC_NULL;
4742   merge->owners_co     = PETSC_NULL;
4743 
4744   /* attach the supporting struct to B_mpi for reuse */
4745   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4746   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4747   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4748   *mpimat = B_mpi;
4749 
4750   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4751   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4752   PetscFunctionReturn(0);
4753 }
4754 
4755 #undef __FUNCT__
4756 #define __FUNCT__ "MatMerge_SeqsToMPI"
4757 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4758 {
4759   PetscErrorCode   ierr;
4760 
4761   PetscFunctionBegin;
4762   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4763   if (scall == MAT_INITIAL_MATRIX){
4764     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4765   }
4766   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4767   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4768   PetscFunctionReturn(0);
4769 }
4770 
4771 #undef __FUNCT__
4772 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4773 /*@
4774      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4775           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4776           with MatGetSize()
4777 
4778     Not Collective
4779 
4780    Input Parameters:
4781 +    A - the matrix
4782 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4783 
4784    Output Parameter:
4785 .    A_loc - the local sequential matrix generated
4786 
4787     Level: developer
4788 
4789 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4790 
4791 @*/
4792 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4793 {
4794   PetscErrorCode  ierr;
4795   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4796   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4797   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4798   MatScalar       *aa=a->a,*ba=b->a,*cam;
4799   PetscScalar     *ca;
4800   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4801   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4802   PetscBool       match;
4803 
4804   PetscFunctionBegin;
4805   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4806   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4807   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4808   if (scall == MAT_INITIAL_MATRIX){
4809     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4810     ci[0] = 0;
4811     for (i=0; i<am; i++){
4812       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4813     }
4814     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4815     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4816     k = 0;
4817     for (i=0; i<am; i++) {
4818       ncols_o = bi[i+1] - bi[i];
4819       ncols_d = ai[i+1] - ai[i];
4820       /* off-diagonal portion of A */
4821       for (jo=0; jo<ncols_o; jo++) {
4822         col = cmap[*bj];
4823         if (col >= cstart) break;
4824         cj[k]   = col; bj++;
4825         ca[k++] = *ba++;
4826       }
4827       /* diagonal portion of A */
4828       for (j=0; j<ncols_d; j++) {
4829         cj[k]   = cstart + *aj++;
4830         ca[k++] = *aa++;
4831       }
4832       /* off-diagonal portion of A */
4833       for (j=jo; j<ncols_o; j++) {
4834         cj[k]   = cmap[*bj++];
4835         ca[k++] = *ba++;
4836       }
4837     }
4838     /* put together the new matrix */
4839     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4840     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4841     /* Since these are PETSc arrays, change flags to free them as necessary. */
4842     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4843     mat->free_a  = PETSC_TRUE;
4844     mat->free_ij = PETSC_TRUE;
4845     mat->nonew   = 0;
4846   } else if (scall == MAT_REUSE_MATRIX){
4847     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4848     ci = mat->i; cj = mat->j; cam = mat->a;
4849     for (i=0; i<am; i++) {
4850       /* off-diagonal portion of A */
4851       ncols_o = bi[i+1] - bi[i];
4852       for (jo=0; jo<ncols_o; jo++) {
4853         col = cmap[*bj];
4854         if (col >= cstart) break;
4855         *cam++ = *ba++; bj++;
4856       }
4857       /* diagonal portion of A */
4858       ncols_d = ai[i+1] - ai[i];
4859       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4860       /* off-diagonal portion of A */
4861       for (j=jo; j<ncols_o; j++) {
4862         *cam++ = *ba++; bj++;
4863       }
4864     }
4865   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4866   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4867   PetscFunctionReturn(0);
4868 }
4869 
4870 #undef __FUNCT__
4871 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4872 /*@C
4873      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4874 
4875     Not Collective
4876 
4877    Input Parameters:
4878 +    A - the matrix
4879 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4880 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4881 
4882    Output Parameter:
4883 .    A_loc - the local sequential matrix generated
4884 
4885     Level: developer
4886 
4887 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4888 
4889 @*/
4890 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4891 {
4892   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4893   PetscErrorCode    ierr;
4894   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4895   IS                isrowa,iscola;
4896   Mat               *aloc;
4897   PetscBool       match;
4898 
4899   PetscFunctionBegin;
4900   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4901   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4902   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4903   if (!row){
4904     start = A->rmap->rstart; end = A->rmap->rend;
4905     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4906   } else {
4907     isrowa = *row;
4908   }
4909   if (!col){
4910     start = A->cmap->rstart;
4911     cmap  = a->garray;
4912     nzA   = a->A->cmap->n;
4913     nzB   = a->B->cmap->n;
4914     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4915     ncols = 0;
4916     for (i=0; i<nzB; i++) {
4917       if (cmap[i] < start) idx[ncols++] = cmap[i];
4918       else break;
4919     }
4920     imark = i;
4921     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4922     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4923     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4924   } else {
4925     iscola = *col;
4926   }
4927   if (scall != MAT_INITIAL_MATRIX){
4928     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4929     aloc[0] = *A_loc;
4930   }
4931   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4932   *A_loc = aloc[0];
4933   ierr = PetscFree(aloc);CHKERRQ(ierr);
4934   if (!row){
4935     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4936   }
4937   if (!col){
4938     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4939   }
4940   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4941   PetscFunctionReturn(0);
4942 }
4943 
4944 #undef __FUNCT__
4945 #define __FUNCT__ "MatGetBrowsOfAcols"
4946 /*@C
4947     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4948 
4949     Collective on Mat
4950 
4951    Input Parameters:
4952 +    A,B - the matrices in mpiaij format
4953 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4954 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4955 
4956    Output Parameter:
4957 +    rowb, colb - index sets of rows and columns of B to extract
4958 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4959 -    B_seq - the sequential matrix generated
4960 
4961     Level: developer
4962 
4963 @*/
4964 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4965 {
4966   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4967   PetscErrorCode    ierr;
4968   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4969   IS                isrowb,iscolb;
4970   Mat               *bseq;
4971 
4972   PetscFunctionBegin;
4973   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4974     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4975   }
4976   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4977 
4978   if (scall == MAT_INITIAL_MATRIX){
4979     start = A->cmap->rstart;
4980     cmap  = a->garray;
4981     nzA   = a->A->cmap->n;
4982     nzB   = a->B->cmap->n;
4983     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4984     ncols = 0;
4985     for (i=0; i<nzB; i++) {  /* row < local row index */
4986       if (cmap[i] < start) idx[ncols++] = cmap[i];
4987       else break;
4988     }
4989     imark = i;
4990     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4991     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4992     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4993     *brstart = imark;
4994     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4995   } else {
4996     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4997     isrowb = *rowb; iscolb = *colb;
4998     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4999     bseq[0] = *B_seq;
5000   }
5001   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5002   *B_seq = bseq[0];
5003   ierr = PetscFree(bseq);CHKERRQ(ierr);
5004   if (!rowb){
5005     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5006   } else {
5007     *rowb = isrowb;
5008   }
5009   if (!colb){
5010     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5011   } else {
5012     *colb = iscolb;
5013   }
5014   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5015   PetscFunctionReturn(0);
5016 }
5017 
5018 #undef __FUNCT__
5019 #define __FUNCT__ "MatGetBrowsOfAoCols"
5020 /*@C
5021     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5022     of the OFF-DIAGONAL portion of local A
5023 
5024     Collective on Mat
5025 
5026    Input Parameters:
5027 +    A,B - the matrices in mpiaij format
5028 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5029 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5030 .    startsj_r - similar to startsj for receives
5031 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5032 
5033    Output Parameter:
5034 +    B_oth - the sequential matrix generated
5035 
5036     Level: developer
5037 
5038 @*/
5039 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5040 {
5041   VecScatter_MPI_General *gen_to,*gen_from;
5042   PetscErrorCode         ierr;
5043   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5044   Mat_SeqAIJ             *b_oth;
5045   VecScatter             ctx=a->Mvctx;
5046   MPI_Comm               comm=((PetscObject)ctx)->comm;
5047   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5048   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5049   PetscScalar            *rvalues,*svalues;
5050   MatScalar              *b_otha,*bufa,*bufA;
5051   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5052   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5053   MPI_Status             *sstatus,rstatus;
5054   PetscMPIInt            jj;
5055   PetscInt               *cols,sbs,rbs;
5056   PetscScalar            *vals;
5057 
5058   PetscFunctionBegin;
5059   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5060     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5061   }
5062   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5063   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5064 
5065   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5066   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5067   rvalues  = gen_from->values; /* holds the length of receiving row */
5068   svalues  = gen_to->values;   /* holds the length of sending row */
5069   nrecvs   = gen_from->n;
5070   nsends   = gen_to->n;
5071 
5072   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5073   srow     = gen_to->indices;   /* local row index to be sent */
5074   sstarts  = gen_to->starts;
5075   sprocs   = gen_to->procs;
5076   sstatus  = gen_to->sstatus;
5077   sbs      = gen_to->bs;
5078   rstarts  = gen_from->starts;
5079   rprocs   = gen_from->procs;
5080   rbs      = gen_from->bs;
5081 
5082   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5083   if (scall == MAT_INITIAL_MATRIX){
5084     /* i-array */
5085     /*---------*/
5086     /*  post receives */
5087     for (i=0; i<nrecvs; i++){
5088       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5089       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5090       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5091     }
5092 
5093     /* pack the outgoing message */
5094     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5095     sstartsj[0] = 0;  rstartsj[0] = 0;
5096     len = 0; /* total length of j or a array to be sent */
5097     k = 0;
5098     for (i=0; i<nsends; i++){
5099       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5100       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5101       for (j=0; j<nrows; j++) {
5102         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5103         for (l=0; l<sbs; l++){
5104           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5105           rowlen[j*sbs+l] = ncols;
5106           len += ncols;
5107           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5108         }
5109         k++;
5110       }
5111       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5112       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5113     }
5114     /* recvs and sends of i-array are completed */
5115     i = nrecvs;
5116     while (i--) {
5117       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5118     }
5119     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5120 
5121     /* allocate buffers for sending j and a arrays */
5122     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5123     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5124 
5125     /* create i-array of B_oth */
5126     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5127     b_othi[0] = 0;
5128     len = 0; /* total length of j or a array to be received */
5129     k = 0;
5130     for (i=0; i<nrecvs; i++){
5131       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5132       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5133       for (j=0; j<nrows; j++) {
5134         b_othi[k+1] = b_othi[k] + rowlen[j];
5135         len += rowlen[j]; k++;
5136       }
5137       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5138     }
5139 
5140     /* allocate space for j and a arrrays of B_oth */
5141     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5142     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5143 
5144     /* j-array */
5145     /*---------*/
5146     /*  post receives of j-array */
5147     for (i=0; i<nrecvs; i++){
5148       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5149       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5150     }
5151 
5152     /* pack the outgoing message j-array */
5153     k = 0;
5154     for (i=0; i<nsends; i++){
5155       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5156       bufJ = bufj+sstartsj[i];
5157       for (j=0; j<nrows; j++) {
5158         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5159         for (ll=0; ll<sbs; ll++){
5160           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5161           for (l=0; l<ncols; l++){
5162             *bufJ++ = cols[l];
5163           }
5164           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5165         }
5166       }
5167       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5168     }
5169 
5170     /* recvs and sends of j-array are completed */
5171     i = nrecvs;
5172     while (i--) {
5173       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5174     }
5175     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5176   } else if (scall == MAT_REUSE_MATRIX){
5177     sstartsj = *startsj;
5178     rstartsj = *startsj_r;
5179     bufa     = *bufa_ptr;
5180     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5181     b_otha   = b_oth->a;
5182   } else {
5183     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5184   }
5185 
5186   /* a-array */
5187   /*---------*/
5188   /*  post receives of a-array */
5189   for (i=0; i<nrecvs; i++){
5190     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5191     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5192   }
5193 
5194   /* pack the outgoing message a-array */
5195   k = 0;
5196   for (i=0; i<nsends; i++){
5197     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5198     bufA = bufa+sstartsj[i];
5199     for (j=0; j<nrows; j++) {
5200       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5201       for (ll=0; ll<sbs; ll++){
5202         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5203         for (l=0; l<ncols; l++){
5204           *bufA++ = vals[l];
5205         }
5206         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5207       }
5208     }
5209     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5210   }
5211   /* recvs and sends of a-array are completed */
5212   i = nrecvs;
5213   while (i--) {
5214     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5215   }
5216   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5217   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5218 
5219   if (scall == MAT_INITIAL_MATRIX){
5220     /* put together the new matrix */
5221     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5222 
5223     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5224     /* Since these are PETSc arrays, change flags to free them as necessary. */
5225     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5226     b_oth->free_a  = PETSC_TRUE;
5227     b_oth->free_ij = PETSC_TRUE;
5228     b_oth->nonew   = 0;
5229 
5230     ierr = PetscFree(bufj);CHKERRQ(ierr);
5231     if (!startsj || !bufa_ptr){
5232       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5233       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5234     } else {
5235       *startsj   = sstartsj;
5236       *startsj_r = rstartsj;
5237       *bufa_ptr  = bufa;
5238     }
5239   }
5240   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5241   PetscFunctionReturn(0);
5242 }
5243 
5244 #undef __FUNCT__
5245 #define __FUNCT__ "MatGetCommunicationStructs"
5246 /*@C
5247   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5248 
5249   Not Collective
5250 
5251   Input Parameters:
5252 . A - The matrix in mpiaij format
5253 
5254   Output Parameter:
5255 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5256 . colmap - A map from global column index to local index into lvec
5257 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5258 
5259   Level: developer
5260 
5261 @*/
5262 #if defined (PETSC_USE_CTABLE)
5263 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5264 #else
5265 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5266 #endif
5267 {
5268   Mat_MPIAIJ *a;
5269 
5270   PetscFunctionBegin;
5271   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5272   PetscValidPointer(lvec, 2);
5273   PetscValidPointer(colmap, 3);
5274   PetscValidPointer(multScatter, 4);
5275   a = (Mat_MPIAIJ *) A->data;
5276   if (lvec) *lvec = a->lvec;
5277   if (colmap) *colmap = a->colmap;
5278   if (multScatter) *multScatter = a->Mvctx;
5279   PetscFunctionReturn(0);
5280 }
5281 
5282 EXTERN_C_BEGIN
5283 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5284 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5285 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5286 EXTERN_C_END
5287 
5288 #undef __FUNCT__
5289 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5290 /*
5291     Computes (B'*A')' since computing B*A directly is untenable
5292 
5293                n                       p                          p
5294         (              )       (              )         (                  )
5295       m (      A       )  *  n (       B      )   =   m (         C        )
5296         (              )       (              )         (                  )
5297 
5298 */
5299 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5300 {
5301   PetscErrorCode     ierr;
5302   Mat                At,Bt,Ct;
5303 
5304   PetscFunctionBegin;
5305   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5306   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5307   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5308   ierr = MatDestroy(&At);CHKERRQ(ierr);
5309   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5310   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5311   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5312   PetscFunctionReturn(0);
5313 }
5314 
5315 #undef __FUNCT__
5316 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5317 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5318 {
5319   PetscErrorCode ierr;
5320   PetscInt       m=A->rmap->n,n=B->cmap->n;
5321   Mat            Cmat;
5322 
5323   PetscFunctionBegin;
5324   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5325   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5326   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5327   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5328   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5329   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5330   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5331   *C   = Cmat;
5332   PetscFunctionReturn(0);
5333 }
5334 
5335 /* ----------------------------------------------------------------*/
5336 #undef __FUNCT__
5337 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5338 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5339 {
5340   PetscErrorCode ierr;
5341 
5342   PetscFunctionBegin;
5343   if (scall == MAT_INITIAL_MATRIX){
5344     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5345   }
5346   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5347   PetscFunctionReturn(0);
5348 }
5349 
5350 EXTERN_C_BEGIN
5351 #if defined(PETSC_HAVE_MUMPS)
5352 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5353 #endif
5354 #if defined(PETSC_HAVE_PASTIX)
5355 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5356 #endif
5357 #if defined(PETSC_HAVE_SUPERLU_DIST)
5358 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5359 #endif
5360 #if defined(PETSC_HAVE_SPOOLES)
5361 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5362 #endif
5363 EXTERN_C_END
5364 
5365 /*MC
5366    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5367 
5368    Options Database Keys:
5369 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5370 
5371   Level: beginner
5372 
5373 .seealso: MatCreateMPIAIJ()
5374 M*/
5375 
5376 EXTERN_C_BEGIN
5377 #undef __FUNCT__
5378 #define __FUNCT__ "MatCreate_MPIAIJ"
5379 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5380 {
5381   Mat_MPIAIJ     *b;
5382   PetscErrorCode ierr;
5383   PetscMPIInt    size;
5384 
5385   PetscFunctionBegin;
5386   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5387 
5388   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5389   B->data         = (void*)b;
5390   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5391   B->rmap->bs     = 1;
5392   B->assembled    = PETSC_FALSE;
5393 
5394   B->insertmode   = NOT_SET_VALUES;
5395   b->size         = size;
5396   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5397 
5398   /* build cache for off array entries formed */
5399   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5400   b->donotstash  = PETSC_FALSE;
5401   b->colmap      = 0;
5402   b->garray      = 0;
5403   b->roworiented = PETSC_TRUE;
5404 
5405   /* stuff used for matrix vector multiply */
5406   b->lvec      = PETSC_NULL;
5407   b->Mvctx     = PETSC_NULL;
5408 
5409   /* stuff for MatGetRow() */
5410   b->rowindices   = 0;
5411   b->rowvalues    = 0;
5412   b->getrowactive = PETSC_FALSE;
5413 
5414 #if defined(PETSC_HAVE_SPOOLES)
5415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5416                                      "MatGetFactor_mpiaij_spooles",
5417                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5418 #endif
5419 #if defined(PETSC_HAVE_MUMPS)
5420   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5421                                      "MatGetFactor_aij_mumps",
5422                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5423 #endif
5424 #if defined(PETSC_HAVE_PASTIX)
5425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5426 					   "MatGetFactor_mpiaij_pastix",
5427 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5428 #endif
5429 #if defined(PETSC_HAVE_SUPERLU_DIST)
5430   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5431                                      "MatGetFactor_mpiaij_superlu_dist",
5432                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5433 #endif
5434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5435                                      "MatStoreValues_MPIAIJ",
5436                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5437   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5438                                      "MatRetrieveValues_MPIAIJ",
5439                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5440   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5441 				     "MatGetDiagonalBlock_MPIAIJ",
5442                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5443   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5444 				     "MatIsTranspose_MPIAIJ",
5445 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5446   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5447 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5448 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5449   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5450 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5451 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5452   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5453 				     "MatDiagonalScaleLocal_MPIAIJ",
5454 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5455   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5456                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5457                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5458   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5459                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5460                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5461   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5462                                      "MatConvert_MPIAIJ_MPISBAIJ",
5463                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5464   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5465                                      "MatMatMult_MPIDense_MPIAIJ",
5466                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5467   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5468                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5469                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5470   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5471                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5472                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5473   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5474   PetscFunctionReturn(0);
5475 }
5476 EXTERN_C_END
5477 
5478 #undef __FUNCT__
5479 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5480 /*@
5481      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5482          and "off-diagonal" part of the matrix in CSR format.
5483 
5484    Collective on MPI_Comm
5485 
5486    Input Parameters:
5487 +  comm - MPI communicator
5488 .  m - number of local rows (Cannot be PETSC_DECIDE)
5489 .  n - This value should be the same as the local size used in creating the
5490        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5491        calculated if N is given) For square matrices n is almost always m.
5492 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5493 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5494 .   i - row indices for "diagonal" portion of matrix
5495 .   j - column indices
5496 .   a - matrix values
5497 .   oi - row indices for "off-diagonal" portion of matrix
5498 .   oj - column indices
5499 -   oa - matrix values
5500 
5501    Output Parameter:
5502 .   mat - the matrix
5503 
5504    Level: advanced
5505 
5506    Notes:
5507        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5508        must free the arrays once the matrix has been destroyed and not before.
5509 
5510        The i and j indices are 0 based
5511 
5512        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5513 
5514        This sets local rows and cannot be used to set off-processor values.
5515 
5516        You cannot later use MatSetValues() to change values in this matrix.
5517 
5518 .keywords: matrix, aij, compressed row, sparse, parallel
5519 
5520 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5521           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5522 @*/
5523 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5524 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5525 {
5526   PetscErrorCode ierr;
5527   Mat_MPIAIJ     *maij;
5528 
5529  PetscFunctionBegin;
5530   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5531   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5532   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5533   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5534   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5535   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5536   maij = (Mat_MPIAIJ*) (*mat)->data;
5537   maij->donotstash     = PETSC_TRUE;
5538   (*mat)->preallocated = PETSC_TRUE;
5539 
5540   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5541   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5542   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5543   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5544 
5545   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5546   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5547 
5548   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5549   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5550   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5551   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552 
5553   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5554   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5555   PetscFunctionReturn(0);
5556 }
5557 
5558 /*
5559     Special version for direct calls from Fortran
5560 */
5561 #include <private/fortranimpl.h>
5562 
5563 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5564 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5565 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5566 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5567 #endif
5568 
5569 /* Change these macros so can be used in void function */
5570 #undef CHKERRQ
5571 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5572 #undef SETERRQ2
5573 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5574 #undef SETERRQ
5575 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5576 
5577 EXTERN_C_BEGIN
5578 #undef __FUNCT__
5579 #define __FUNCT__ "matsetvaluesmpiaij_"
5580 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5581 {
5582   Mat             mat = *mmat;
5583   PetscInt        m = *mm, n = *mn;
5584   InsertMode      addv = *maddv;
5585   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5586   PetscScalar     value;
5587   PetscErrorCode  ierr;
5588 
5589   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5590   if (mat->insertmode == NOT_SET_VALUES) {
5591     mat->insertmode = addv;
5592   }
5593 #if defined(PETSC_USE_DEBUG)
5594   else if (mat->insertmode != addv) {
5595     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5596   }
5597 #endif
5598   {
5599   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5600   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5601   PetscBool       roworiented = aij->roworiented;
5602 
5603   /* Some Variables required in the macro */
5604   Mat             A = aij->A;
5605   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5606   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5607   MatScalar       *aa = a->a;
5608   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5609   Mat             B = aij->B;
5610   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5611   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5612   MatScalar       *ba = b->a;
5613 
5614   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5615   PetscInt        nonew = a->nonew;
5616   MatScalar       *ap1,*ap2;
5617 
5618   PetscFunctionBegin;
5619   for (i=0; i<m; i++) {
5620     if (im[i] < 0) continue;
5621 #if defined(PETSC_USE_DEBUG)
5622     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5623 #endif
5624     if (im[i] >= rstart && im[i] < rend) {
5625       row      = im[i] - rstart;
5626       lastcol1 = -1;
5627       rp1      = aj + ai[row];
5628       ap1      = aa + ai[row];
5629       rmax1    = aimax[row];
5630       nrow1    = ailen[row];
5631       low1     = 0;
5632       high1    = nrow1;
5633       lastcol2 = -1;
5634       rp2      = bj + bi[row];
5635       ap2      = ba + bi[row];
5636       rmax2    = bimax[row];
5637       nrow2    = bilen[row];
5638       low2     = 0;
5639       high2    = nrow2;
5640 
5641       for (j=0; j<n; j++) {
5642         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5643         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5644         if (in[j] >= cstart && in[j] < cend){
5645           col = in[j] - cstart;
5646           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5647         } else if (in[j] < 0) continue;
5648 #if defined(PETSC_USE_DEBUG)
5649         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5650 #endif
5651         else {
5652           if (mat->was_assembled) {
5653             if (!aij->colmap) {
5654               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5655             }
5656 #if defined (PETSC_USE_CTABLE)
5657             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5658 	    col--;
5659 #else
5660             col = aij->colmap[in[j]] - 1;
5661 #endif
5662             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5663               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5664               col =  in[j];
5665               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5666               B = aij->B;
5667               b = (Mat_SeqAIJ*)B->data;
5668               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5669               rp2      = bj + bi[row];
5670               ap2      = ba + bi[row];
5671               rmax2    = bimax[row];
5672               nrow2    = bilen[row];
5673               low2     = 0;
5674               high2    = nrow2;
5675               bm       = aij->B->rmap->n;
5676               ba = b->a;
5677             }
5678           } else col = in[j];
5679           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5680         }
5681       }
5682     } else {
5683       if (!aij->donotstash) {
5684         if (roworiented) {
5685           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5686         } else {
5687           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5688         }
5689       }
5690     }
5691   }}
5692   PetscFunctionReturnVoid();
5693 }
5694 EXTERN_C_END
5695 
5696