xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fcb1c9af1a39adbb75e1011fa083d9d197867fec)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
8 {
9   PetscErrorCode  ierr;
10   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
11   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
12   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
13   const PetscInt  *ia,*ib;
14   const MatScalar *aa,*bb;
15   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
16   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
17 
18   PetscFunctionBegin;
19   *keptrows = 0;
20   ia = a->i;
21   ib = b->i;
22   for (i=0; i<m; i++) {
23     na = ia[i+1] - ia[i];
24     nb = ib[i+1] - ib[i];
25     if (!na && !nb) {
26       cnt++;
27       goto ok1;
28     }
29     aa = a->a + ia[i];
30     for (j=0; j<na; j++) {
31       if (aa[j] != 0.0) goto ok1;
32     }
33     bb = b->a + ib[i];
34     for (j=0; j <nb; j++) {
35       if (bb[j] != 0.0) goto ok1;
36     }
37     cnt++;
38     ok1:;
39   }
40   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
41   if (!n0rows) PetscFunctionReturn(0);
42   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
43   cnt = 0;
44   for (i=0; i<m; i++) {
45     na = ia[i+1] - ia[i];
46     nb = ib[i+1] - ib[i];
47     if (!na && !nb) continue;
48     aa = a->a + ia[i];
49     for(j=0; j<na;j++) {
50       if (aa[j] != 0.0) {
51         rows[cnt++] = rstart + i;
52         goto ok2;
53       }
54     }
55     bb = b->a + ib[i];
56     for (j=0; j<nb; j++) {
57       if (bb[j] != 0.0) {
58         rows[cnt++] = rstart + i;
59         goto ok2;
60       }
61     }
62     ok2:;
63   }
64   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatDistribute_MPIAIJ"
70 /*
71     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
72     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
73 
74     Only for square matrices
75 */
76 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
77 {
78   PetscMPIInt    rank,size;
79   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
80   PetscErrorCode ierr;
81   Mat            mat;
82   Mat_SeqAIJ     *gmata;
83   PetscMPIInt    tag;
84   MPI_Status     status;
85   PetscBool      aij;
86   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
87 
88   PetscFunctionBegin;
89   CHKMEMQ;
90   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
91   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
92   if (!rank) {
93     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
94     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
95   }
96   if (reuse == MAT_INITIAL_MATRIX) {
97     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
98     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
99     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
100     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
101     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
102     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
103     rowners[0] = 0;
104     for (i=2; i<=size; i++) {
105       rowners[i] += rowners[i-1];
106     }
107     rstart = rowners[rank];
108     rend   = rowners[rank+1];
109     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
110     if (!rank) {
111       gmata = (Mat_SeqAIJ*) gmat->data;
112       /* send row lengths to all processors */
113       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
114       for (i=1; i<size; i++) {
115 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
116       }
117       /* determine number diagonal and off-diagonal counts */
118       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
119       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
120       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
121       jj = 0;
122       for (i=0; i<m; i++) {
123 	for (j=0; j<dlens[i]; j++) {
124           if (gmata->j[jj] < rstart) ld[i]++;
125 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
126 	  jj++;
127 	}
128       }
129       /* send column indices to other processes */
130       for (i=1; i<size; i++) {
131 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
132 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
133 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134       }
135 
136       /* send numerical values to other processes */
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       gmataa = gmata->a;
142       gmataj = gmata->j;
143 
144     } else {
145       /* receive row lengths */
146       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
147       /* receive column indices */
148       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
149       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
150       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
151       /* determine number diagonal and off-diagonal counts */
152       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
153       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
154       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
155       jj = 0;
156       for (i=0; i<m; i++) {
157 	for (j=0; j<dlens[i]; j++) {
158           if (gmataj[jj] < rstart) ld[i]++;
159 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
160 	  jj++;
161 	}
162       }
163       /* receive numerical values */
164       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
165       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
166     }
167     /* set preallocation */
168     for (i=0; i<m; i++) {
169       dlens[i] -= olens[i];
170     }
171     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
172     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
173 
174     for (i=0; i<m; i++) {
175       dlens[i] += olens[i];
176     }
177     cnt  = 0;
178     for (i=0; i<m; i++) {
179       row  = rstart + i;
180       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
181       cnt += dlens[i];
182     }
183     if (rank) {
184       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
185     }
186     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
187     ierr = PetscFree(rowners);CHKERRQ(ierr);
188     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
189     *inmat = mat;
190   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
191     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
192     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
193     mat   = *inmat;
194     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
195     if (!rank) {
196       /* send numerical values to other processes */
197       gmata = (Mat_SeqAIJ*) gmat->data;
198       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
199       gmataa = gmata->a;
200       for (i=1; i<size; i++) {
201         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
202         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
203       }
204       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
205     } else {
206       /* receive numerical values from process 0*/
207       nz   = Ad->nz + Ao->nz;
208       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
209       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
210     }
211     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
212     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
213     ad = Ad->a;
214     ao = Ao->a;
215     if (mat->rmap->n) {
216       i  = 0;
217       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
218       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
219     }
220     for (i=1; i<mat->rmap->n; i++) {
221       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
222       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
223     }
224     i--;
225     if (mat->rmap->n) {
226       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
227     }
228     if (rank) {
229       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
230     }
231   }
232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   CHKMEMQ;
235   PetscFunctionReturn(0);
236 }
237 
238 /*
239   Local utility routine that creates a mapping from the global column
240 number to the local number in the off-diagonal part of the local
241 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
242 a slightly higher hash table cost; without it it is not scalable (each processor
243 has an order N integer array but is fast to acess.
244 */
245 #undef __FUNCT__
246 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
247 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
248 {
249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
250   PetscErrorCode ierr;
251   PetscInt       n = aij->B->cmap->n,i;
252 
253   PetscFunctionBegin;
254 #if defined (PETSC_USE_CTABLE)
255   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
256   for (i=0; i<n; i++){
257     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
258   }
259 #else
260   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
261   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
262   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
264 #endif
265   PetscFunctionReturn(0);
266 }
267 
268 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
269 { \
270     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
271     lastcol1 = col;\
272     while (high1-low1 > 5) { \
273       t = (low1+high1)/2; \
274       if (rp1[t] > col) high1 = t; \
275       else             low1  = t; \
276     } \
277       for (_i=low1; _i<high1; _i++) { \
278         if (rp1[_i] > col) break; \
279         if (rp1[_i] == col) { \
280           if (addv == ADD_VALUES) ap1[_i] += value;   \
281           else                    ap1[_i] = value; \
282           goto a_noinsert; \
283         } \
284       }  \
285       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
286       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
287       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
288       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
289       N = nrow1++ - 1; a->nz++; high1++; \
290       /* shift up all the later entries in this row */ \
291       for (ii=N; ii>=_i; ii--) { \
292         rp1[ii+1] = rp1[ii]; \
293         ap1[ii+1] = ap1[ii]; \
294       } \
295       rp1[_i] = col;  \
296       ap1[_i] = value;  \
297       a_noinsert: ; \
298       ailen[row] = nrow1; \
299 }
300 
301 
302 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
303 { \
304     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
305     lastcol2 = col;\
306     while (high2-low2 > 5) { \
307       t = (low2+high2)/2; \
308       if (rp2[t] > col) high2 = t; \
309       else             low2  = t; \
310     } \
311     for (_i=low2; _i<high2; _i++) {		\
312       if (rp2[_i] > col) break;			\
313       if (rp2[_i] == col) {			      \
314 	if (addv == ADD_VALUES) ap2[_i] += value;     \
315 	else                    ap2[_i] = value;      \
316 	goto b_noinsert;			      \
317       }						      \
318     }							      \
319     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
320     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
321     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
322     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
323     N = nrow2++ - 1; b->nz++; high2++;					\
324     /* shift up all the later entries in this row */			\
325     for (ii=N; ii>=_i; ii--) {						\
326       rp2[ii+1] = rp2[ii];						\
327       ap2[ii+1] = ap2[ii];						\
328     }									\
329     rp2[_i] = col;							\
330     ap2[_i] = value;							\
331     b_noinsert: ;								\
332     bilen[row] = nrow2;							\
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
337 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
338 {
339   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
340   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
341   PetscErrorCode ierr;
342   PetscInt       l,*garray = mat->garray,diag;
343 
344   PetscFunctionBegin;
345   /* code only works for square matrices A */
346 
347   /* find size of row to the left of the diagonal part */
348   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
349   row  = row - diag;
350   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
351     if (garray[b->j[b->i[row]+l]] > diag) break;
352   }
353   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
354 
355   /* diagonal part */
356   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358   /* right of diagonal part */
359   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatSetValues_MPIAIJ"
365 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
366 {
367   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
368   PetscScalar    value;
369   PetscErrorCode ierr;
370   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
371   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
372   PetscBool      roworiented = aij->roworiented;
373 
374   /* Some Variables required in the macro */
375   Mat            A = aij->A;
376   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
377   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
378   MatScalar      *aa = a->a;
379   PetscBool      ignorezeroentries = a->ignorezeroentries;
380   Mat            B = aij->B;
381   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
382   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
383   MatScalar      *ba = b->a;
384 
385   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
386   PetscInt       nonew = a->nonew;
387   MatScalar      *ap1,*ap2;
388 
389   PetscFunctionBegin;
390   if (v) PetscValidScalarPointer(v,6);
391   for (i=0; i<m; i++) {
392     if (im[i] < 0) continue;
393 #if defined(PETSC_USE_DEBUG)
394     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
395 #endif
396     if (im[i] >= rstart && im[i] < rend) {
397       row      = im[i] - rstart;
398       lastcol1 = -1;
399       rp1      = aj + ai[row];
400       ap1      = aa + ai[row];
401       rmax1    = aimax[row];
402       nrow1    = ailen[row];
403       low1     = 0;
404       high1    = nrow1;
405       lastcol2 = -1;
406       rp2      = bj + bi[row];
407       ap2      = ba + bi[row];
408       rmax2    = bimax[row];
409       nrow2    = bilen[row];
410       low2     = 0;
411       high2    = nrow2;
412 
413       for (j=0; j<n; j++) {
414         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
415         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
416         if (in[j] >= cstart && in[j] < cend){
417           col = in[j] - cstart;
418           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!aij->colmap) {
426               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
430 	    col--;
431 #else
432             col = aij->colmap[in[j]] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
435               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
438               B = aij->B;
439               b = (Mat_SeqAIJ*)B->data;
440               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
441               rp2      = bj + bi[row];
442               ap2      = ba + bi[row];
443               rmax2    = bimax[row];
444               nrow2    = bilen[row];
445               low2     = 0;
446               high2    = nrow2;
447               bm       = aij->B->rmap->n;
448               ba = b->a;
449             }
450           } else col = in[j];
451           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
452         }
453       }
454     } else {
455       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
456       if (!aij->donotstash) {
457         if (roworiented) {
458           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
459         } else {
460           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatGetValues_MPIAIJ"
470 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
471 {
472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
473   PetscErrorCode ierr;
474   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
475   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
476 
477   PetscFunctionBegin;
478   for (i=0; i<m; i++) {
479     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
480     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
481     if (idxm[i] >= rstart && idxm[i] < rend) {
482       row = idxm[i] - rstart;
483       for (j=0; j<n; j++) {
484         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
485         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
486         if (idxn[j] >= cstart && idxn[j] < cend){
487           col = idxn[j] - cstart;
488           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
489         } else {
490           if (!aij->colmap) {
491             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
492           }
493 #if defined (PETSC_USE_CTABLE)
494           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
495           col --;
496 #else
497           col = aij->colmap[idxn[j]] - 1;
498 #endif
499           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
500           else {
501             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
502           }
503         }
504       }
505     } else {
506       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
507     }
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
516 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
519   PetscErrorCode ierr;
520   PetscInt       nstash,reallocs;
521   InsertMode     addv;
522 
523   PetscFunctionBegin;
524   if (aij->donotstash || mat->nooffprocentries) {
525     PetscFunctionReturn(0);
526   }
527 
528   /* make sure all processors are either in INSERTMODE or ADDMODE */
529   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
530   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
531   mat->insertmode = addv; /* in case this processor had no cache */
532 
533   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
534   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
535   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
541 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
542 {
543   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
544   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
545   PetscErrorCode ierr;
546   PetscMPIInt    n;
547   PetscInt       i,j,rstart,ncols,flg;
548   PetscInt       *row,*col;
549   PetscBool      other_disassembled;
550   PetscScalar    *val;
551   InsertMode     addv = mat->insertmode;
552 
553   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
554   PetscFunctionBegin;
555   if (!aij->donotstash && !mat->nooffprocentries) {
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         /* Now assemble all these values with a single function call */
566         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
571   }
572   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
589   }
590   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
591   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
596   aij->rowvalues = 0;
597 
598   /* used by MatAXPY() */
599   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
600   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
601 
602   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
603   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
609 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
610 {
611   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
616   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatZeroRows_MPIAIJ"
622 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
623 {
624   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
625   PetscErrorCode    ierr;
626   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
627   PetscInt          i,*owners = A->rmap->range;
628   PetscInt          *nprocs,j,idx,nsends,row;
629   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
630   PetscInt          *rvalues,count,base,slen,*source;
631   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
632   MPI_Comm          comm = ((PetscObject)A)->comm;
633   MPI_Request       *send_waits,*recv_waits;
634   MPI_Status        recv_status,*send_status;
635   const PetscScalar *xx;
636   PetscScalar       *bb;
637 #if defined(PETSC_DEBUG)
638   PetscBool      found = PETSC_FALSE;
639 #endif
640 
641   PetscFunctionBegin;
642   /*  first count number of contributors to each processor */
643   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
644   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
645   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
646   j = 0;
647   for (i=0; i<N; i++) {
648     if (lastidx > (idx = rows[i])) j = 0;
649     lastidx = idx;
650     for (; j<size; j++) {
651       if (idx >= owners[j] && idx < owners[j+1]) {
652         nprocs[2*j]++;
653         nprocs[2*j+1] = 1;
654         owner[i] = j;
655 #if defined(PETSC_DEBUG)
656         found = PETSC_TRUE;
657 #endif
658         break;
659       }
660     }
661 #if defined(PETSC_DEBUG)
662     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
663     found = PETSC_FALSE;
664 #endif
665   }
666   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
667 
668   if (A->nooffproczerorows) {
669     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
670     nrecvs = nsends;
671     nmax   = N;
672   } else {
673     /* inform other processors of number of messages and max length*/
674     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
675   }
676 
677   /* post receives:   */
678   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
679   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
680   for (i=0; i<nrecvs; i++) {
681     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
682   }
683 
684   /* do sends:
685       1) starts[i] gives the starting index in svalues for stuff going to
686          the ith processor
687   */
688   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
689   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
690   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
691   starts[0] = 0;
692   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
693   for (i=0; i<N; i++) {
694     svalues[starts[owner[i]]++] = rows[i];
695   }
696 
697   starts[0] = 0;
698   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
699   count = 0;
700   for (i=0; i<size; i++) {
701     if (nprocs[2*i+1]) {
702       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
703     }
704   }
705   ierr = PetscFree(starts);CHKERRQ(ierr);
706 
707   base = owners[rank];
708 
709   /*  wait on receives */
710   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
711   count  = nrecvs; slen = 0;
712   while (count) {
713     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
714     /* unpack receives into our local space */
715     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
716     source[imdex]  = recv_status.MPI_SOURCE;
717     lens[imdex]    = n;
718     slen          += n;
719     count--;
720   }
721   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
722 
723   /* move the data into the send scatter */
724   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
725   count = 0;
726   for (i=0; i<nrecvs; i++) {
727     values = rvalues + i*nmax;
728     for (j=0; j<lens[i]; j++) {
729       lrows[count++] = values[j] - base;
730     }
731   }
732   ierr = PetscFree(rvalues);CHKERRQ(ierr);
733   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
734   ierr = PetscFree(owner);CHKERRQ(ierr);
735   ierr = PetscFree(nprocs);CHKERRQ(ierr);
736 
737   /* fix right hand side if needed */
738   if (x && b) {
739     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
740     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
741     for (i=0; i<slen; i++) {
742       bb[lrows[i]] = diag*xx[lrows[i]];
743     }
744     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
745     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
746   }
747   /*
748         Zero the required rows. If the "diagonal block" of the matrix
749      is square and the user wishes to set the diagonal we use separate
750      code so that MatSetValues() is not called for each diagonal allocating
751      new memory, thus calling lots of mallocs and slowing things down.
752 
753   */
754   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
755   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
756   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
757     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
758   } else if (diag != 0.0) {
759     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
760     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
761       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
762 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
763     }
764     for (i = 0; i < slen; i++) {
765       row  = lrows[i] + rstart;
766       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
767     }
768     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
769     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770   } else {
771     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
772   }
773   ierr = PetscFree(lrows);CHKERRQ(ierr);
774 
775   /* wait on sends */
776   if (nsends) {
777     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
778     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
779     ierr = PetscFree(send_status);CHKERRQ(ierr);
780   }
781   ierr = PetscFree(send_waits);CHKERRQ(ierr);
782   ierr = PetscFree(svalues);CHKERRQ(ierr);
783 
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
789 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
790 {
791   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
792   PetscErrorCode    ierr;
793   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
794   PetscInt          i,*owners = A->rmap->range;
795   PetscInt          *nprocs,j,idx,nsends;
796   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
797   PetscInt          *rvalues,count,base,slen,*source;
798   PetscInt          *lens,*lrows,*values,m;
799   MPI_Comm          comm = ((PetscObject)A)->comm;
800   MPI_Request       *send_waits,*recv_waits;
801   MPI_Status        recv_status,*send_status;
802   const PetscScalar *xx;
803   PetscScalar       *bb,*mask;
804   Vec               xmask,lmask;
805   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
806   const PetscInt    *aj, *ii,*ridx;
807   PetscScalar       *aa;
808 #if defined(PETSC_DEBUG)
809   PetscBool         found = PETSC_FALSE;
810 #endif
811 
812   PetscFunctionBegin;
813   /*  first count number of contributors to each processor */
814   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
815   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
816   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
817   j = 0;
818   for (i=0; i<N; i++) {
819     if (lastidx > (idx = rows[i])) j = 0;
820     lastidx = idx;
821     for (; j<size; j++) {
822       if (idx >= owners[j] && idx < owners[j+1]) {
823         nprocs[2*j]++;
824         nprocs[2*j+1] = 1;
825         owner[i] = j;
826 #if defined(PETSC_DEBUG)
827         found = PETSC_TRUE;
828 #endif
829         break;
830       }
831     }
832 #if defined(PETSC_DEBUG)
833     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
834     found = PETSC_FALSE;
835 #endif
836   }
837   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
838 
839   /* inform other processors of number of messages and max length*/
840   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
841 
842   /* post receives:   */
843   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
844   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
845   for (i=0; i<nrecvs; i++) {
846     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
847   }
848 
849   /* do sends:
850       1) starts[i] gives the starting index in svalues for stuff going to
851          the ith processor
852   */
853   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
854   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
855   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
856   starts[0] = 0;
857   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
858   for (i=0; i<N; i++) {
859     svalues[starts[owner[i]]++] = rows[i];
860   }
861 
862   starts[0] = 0;
863   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
864   count = 0;
865   for (i=0; i<size; i++) {
866     if (nprocs[2*i+1]) {
867       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
868     }
869   }
870   ierr = PetscFree(starts);CHKERRQ(ierr);
871 
872   base = owners[rank];
873 
874   /*  wait on receives */
875   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
876   count  = nrecvs; slen = 0;
877   while (count) {
878     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
879     /* unpack receives into our local space */
880     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
881     source[imdex]  = recv_status.MPI_SOURCE;
882     lens[imdex]    = n;
883     slen          += n;
884     count--;
885   }
886   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
887 
888   /* move the data into the send scatter */
889   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
890   count = 0;
891   for (i=0; i<nrecvs; i++) {
892     values = rvalues + i*nmax;
893     for (j=0; j<lens[i]; j++) {
894       lrows[count++] = values[j] - base;
895     }
896   }
897   ierr = PetscFree(rvalues);CHKERRQ(ierr);
898   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
899   ierr = PetscFree(owner);CHKERRQ(ierr);
900   ierr = PetscFree(nprocs);CHKERRQ(ierr);
901   /* lrows are the local rows to be zeroed, slen is the number of local rows */
902 
903   /* zero diagonal part of matrix */
904   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
905 
906   /* handle off diagonal part of matrix */
907   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
908   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
909   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
910   for (i=0; i<slen; i++) {
911     bb[lrows[i]] = 1;
912   }
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
917   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
921   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922 
923   /* remove zeroed rows of off diagonal matrix */
924   ii = aij->i;
925   for (i=0; i<slen; i++) {
926     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
927   }
928 
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use){
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++){
935       n   = ii[i+1] - ii[i];
936       aj  = aij->j + ii[i];
937       aa  = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           bb[*ridx] -= *aa*xx[*aj];
942           *aa        = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951    for (i=0; i<m; i++) {
952      n   = ii[i+1] - ii[i];
953      aj  = aij->j + ii[i];
954      aa  = aij->a + ii[i];
955      for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           bb[i] -= *aa*xx[*aj];
958           *aa    = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965 
966   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
968   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971 
972   /* wait on sends */
973   if (nsends) {
974     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
975     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
976     ierr = PetscFree(send_status);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(send_waits);CHKERRQ(ierr);
979   ierr = PetscFree(svalues);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMult_MPIAIJ"
986 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscInt       nt;
991 
992   PetscFunctionBegin;
993   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
994   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
995   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1004 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1016 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1024   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1031 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1032 {
1033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1034   PetscErrorCode ierr;
1035   PetscBool      merged;
1036 
1037   PetscFunctionBegin;
1038   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1039   /* do nondiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1041   if (!merged) {
1042     /* send it on its way */
1043     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044     /* do local part */
1045     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1046     /* receive remote parts: note this assumes the values are not actually */
1047     /* added in yy until the next line, */
1048     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049   } else {
1050     /* do local part */
1051     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1052     /* send it on its way */
1053     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054     /* values actually were received in the Begin() but we need to call this nop */
1055     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 EXTERN_C_BEGIN
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1063 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1064 {
1065   MPI_Comm       comm;
1066   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1067   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1068   IS             Me,Notme;
1069   PetscErrorCode ierr;
1070   PetscInt       M,N,first,last,*notme,i;
1071   PetscMPIInt    size;
1072 
1073   PetscFunctionBegin;
1074 
1075   /* Easy test: symmetric diagonal block */
1076   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1077   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1078   if (!*f) PetscFunctionReturn(0);
1079   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081   if (size == 1) PetscFunctionReturn(0);
1082 
1083   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1084   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1085   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1086   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1087   for (i=0; i<first; i++) notme[i] = i;
1088   for (i=last; i<M; i++) notme[i-last+first] = i;
1089   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1090   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1091   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1092   Aoff = Aoffs[0];
1093   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1094   Boff = Boffs[0];
1095   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1096   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1098   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1099   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1100   ierr = PetscFree(notme);CHKERRQ(ierr);
1101    PetscFunctionReturn(0);
1102 }
1103 EXTERN_C_END
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1107 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   /* do nondiagonal part */
1114   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1115   /* send it on its way */
1116   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1117   /* do local part */
1118   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1119   /* receive remote parts */
1120   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 /*
1125   This only works correctly for square matrices where the subblock A->A is the
1126    diagonal block
1127 */
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1130 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1131 {
1132   PetscErrorCode ierr;
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134 
1135   PetscFunctionBegin;
1136   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1137   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1138   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatScale_MPIAIJ"
1144 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1145 {
1146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1151   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatDestroy_MPIAIJ"
1157 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1158 {
1159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163 #if defined(PETSC_USE_LOG)
1164   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1165 #endif
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1169   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1170 #if defined (PETSC_USE_CTABLE)
1171   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1177   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1196 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1197 {
1198   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1199   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1200   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1201   PetscErrorCode    ierr;
1202   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1203   int               fd;
1204   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1205   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1206   PetscScalar       *column_values;
1207   PetscInt          message_count,flowcontrolcount;
1208 
1209   PetscFunctionBegin;
1210   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1211   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1212   nz   = A->nz + B->nz;
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1218     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) {
1224       rlen = PetscMax(rlen,range[i+1] - range[i]);
1225     }
1226   } else {
1227     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1228     rlen = mat->rmap->n;
1229   }
1230 
1231   /* load up the local row counts */
1232   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1233   for (i=0; i<mat->rmap->n; i++) {
1234     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1235   }
1236 
1237   /* store the row lengths to the file */
1238   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1239   if (!rank) {
1240     MPI_Status status;
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1251     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* )th processor needs space a largest processor needs */
1258   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1259   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1260   cnt  = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ( (col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) {
1267       column_indices[cnt++] = A->j[k] + cstart;
1268     }
1269     for (; j<B->i[i+1]; j++) {
1270       column_indices[cnt++] = garray[B->j[j]];
1271     }
1272   }
1273   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1274 
1275   /* store the column indices to the file */
1276    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     MPI_Status status;
1279     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1280     for (i=1; i<size; i++) {
1281       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1282       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1283       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1284       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1290     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1291     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1292     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1293   }
1294   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1295 
1296   /* load up the local column values */
1297   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1298   cnt  = 0;
1299   for (i=0; i<mat->rmap->n; i++) {
1300     for (j=B->i[i]; j<B->i[i+1]; j++) {
1301       if ( garray[B->j[j]] > cstart) break;
1302       column_values[cnt++] = B->a[j];
1303     }
1304     for (k=A->i[i]; k<A->i[i+1]; k++) {
1305       column_values[cnt++] = A->a[k];
1306     }
1307     for (; j<B->i[i+1]; j++) {
1308       column_values[cnt++] = B->a[j];
1309     }
1310   }
1311   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1312 
1313   /* store the column values to the file */
1314    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1315   if (!rank) {
1316     MPI_Status status;
1317     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1318     for (i=1; i<size; i++) {
1319        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1320       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1321       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1322       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1323       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1324     }
1325     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1326   } else {
1327     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1328     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1329     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1331   }
1332   ierr = PetscFree(column_values);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1338 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1339 {
1340   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1341   PetscErrorCode    ierr;
1342   PetscMPIInt       rank = aij->rank,size = aij->size;
1343   PetscBool         isdraw,iascii,isbinary;
1344   PetscViewer       sviewer;
1345   PetscViewerFormat format;
1346 
1347   PetscFunctionBegin;
1348   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1350   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1351   if (iascii) {
1352     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1353     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1354       MatInfo    info;
1355       PetscBool  inodes;
1356 
1357       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1358       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1359       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1361       if (!inodes) {
1362         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1363 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1364       } else {
1365         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1366 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1367       }
1368       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1370       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1371       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1372       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1374       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1375       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1378       PetscInt   inodecount,inodelimit,*inodes;
1379       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1380       if (inodes) {
1381         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1382       } else {
1383         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1384       }
1385       PetscFunctionReturn(0);
1386     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1387       PetscFunctionReturn(0);
1388     }
1389   } else if (isbinary) {
1390     if (size == 1) {
1391       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1392       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1393     } else {
1394       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1395     }
1396     PetscFunctionReturn(0);
1397   } else if (isdraw) {
1398     PetscDraw  draw;
1399     PetscBool  isnull;
1400     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1401     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1402   }
1403 
1404   if (size == 1) {
1405     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1406     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1407   } else {
1408     /* assemble the entire matrix onto first processor. */
1409     Mat         A;
1410     Mat_SeqAIJ  *Aloc;
1411     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1412     MatScalar   *a;
1413 
1414     if (mat->rmap->N > 1024) {
1415       PetscBool  flg = PETSC_FALSE;
1416 
1417       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1418       if (!flg) {
1419         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1420       }
1421     }
1422 
1423     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1424     if (!rank) {
1425       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1426     } else {
1427       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1428     }
1429     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1430     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1431     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1432     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1433 
1434     /* copy over the A part */
1435     Aloc = (Mat_SeqAIJ*)aij->A->data;
1436     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1437     row = mat->rmap->rstart;
1438     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1439     for (i=0; i<m; i++) {
1440       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1441       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1442     }
1443     aj = Aloc->j;
1444     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1445 
1446     /* copy over the B part */
1447     Aloc = (Mat_SeqAIJ*)aij->B->data;
1448     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1449     row  = mat->rmap->rstart;
1450     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1451     ct   = cols;
1452     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1453     for (i=0; i<m; i++) {
1454       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1455       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1456     }
1457     ierr = PetscFree(ct);CHKERRQ(ierr);
1458     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460     /*
1461        Everyone has to call to draw the matrix since the graphics waits are
1462        synchronized across all processors that share the PetscDraw object
1463     */
1464     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1465     if (!rank) {
1466       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1467       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1468       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1469       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1470     }
1471     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1472     ierr = MatDestroy(&A);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatView_MPIAIJ"
1479 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1480 {
1481   PetscErrorCode ierr;
1482   PetscBool      iascii,isdraw,issocket,isbinary;
1483 
1484   PetscFunctionBegin;
1485   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1486   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1487   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1488   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1489   if (iascii || isdraw || isbinary || issocket) {
1490     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1491   } else {
1492     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatSOR_MPIAIJ"
1499 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1500 {
1501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1502   PetscErrorCode ierr;
1503   Vec            bb1 = 0;
1504   PetscBool      hasop;
1505 
1506   PetscFunctionBegin;
1507   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1508     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1509   }
1510 
1511   if (flag == SOR_APPLY_UPPER) {
1512     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1513     PetscFunctionReturn(0);
1514   }
1515 
1516   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1517     if (flag & SOR_ZERO_INITIAL_GUESS) {
1518       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1519       its--;
1520     }
1521 
1522     while (its--) {
1523       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525 
1526       /* update rhs: bb1 = bb - B*x */
1527       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1528       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1529 
1530       /* local sweep */
1531       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1532     }
1533   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1534     if (flag & SOR_ZERO_INITIAL_GUESS) {
1535       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1536       its--;
1537     }
1538     while (its--) {
1539       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541 
1542       /* update rhs: bb1 = bb - B*x */
1543       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1544       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1545 
1546       /* local sweep */
1547       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1548     }
1549   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554     while (its--) {
1555       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557 
1558       /* update rhs: bb1 = bb - B*x */
1559       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1560       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1561 
1562       /* local sweep */
1563       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1564     }
1565   }  else if (flag & SOR_EISENSTAT) {
1566     Vec         xx1;
1567 
1568     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1569     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1570 
1571     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573     if (!mat->diag) {
1574       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1575       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1576     }
1577     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1578     if (hasop) {
1579       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1580     } else {
1581       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1582     }
1583     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1584 
1585     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1586 
1587     /* local sweep */
1588     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1589     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1590     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1591   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1592 
1593   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatPermute_MPIAIJ"
1599 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1600 {
1601   MPI_Comm       comm,pcomm;
1602   PetscInt       first,local_size,nrows;
1603   const PetscInt *rows;
1604   PetscMPIInt    size;
1605   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1606   PetscErrorCode ierr;
1607 
1608   PetscFunctionBegin;
1609   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1610   /* make a collective version of 'rowp' */
1611   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1612   if (pcomm==comm) {
1613     crowp = rowp;
1614   } else {
1615     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1616     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1617     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1618     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1619   }
1620   /* collect the global row permutation and invert it */
1621   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1622   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1623   if (pcomm!=comm) {
1624     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1625   }
1626   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1627   /* get the local target indices */
1628   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1629   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1630   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1631   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1632   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1633   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1634   /* the column permutation is so much easier;
1635      make a local version of 'colp' and invert it */
1636   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1637   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1638   if (size==1) {
1639     lcolp = colp;
1640   } else {
1641     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1642     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1643     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1644   }
1645   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1646   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1647   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1648   if (size>1) {
1649     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1650     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1651   }
1652   /* now we just get the submatrix */
1653   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1654   /* clean up */
1655   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1656   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1662 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1663 {
1664   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1665   Mat            A = mat->A,B = mat->B;
1666   PetscErrorCode ierr;
1667   PetscReal      isend[5],irecv[5];
1668 
1669   PetscFunctionBegin;
1670   info->block_size     = 1.0;
1671   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1672   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1673   isend[3] = info->memory;  isend[4] = info->mallocs;
1674   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1675   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1676   isend[3] += info->memory;  isend[4] += info->mallocs;
1677   if (flag == MAT_LOCAL) {
1678     info->nz_used      = isend[0];
1679     info->nz_allocated = isend[1];
1680     info->nz_unneeded  = isend[2];
1681     info->memory       = isend[3];
1682     info->mallocs      = isend[4];
1683   } else if (flag == MAT_GLOBAL_MAX) {
1684     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1685     info->nz_used      = irecv[0];
1686     info->nz_allocated = irecv[1];
1687     info->nz_unneeded  = irecv[2];
1688     info->memory       = irecv[3];
1689     info->mallocs      = irecv[4];
1690   } else if (flag == MAT_GLOBAL_SUM) {
1691     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1692     info->nz_used      = irecv[0];
1693     info->nz_allocated = irecv[1];
1694     info->nz_unneeded  = irecv[2];
1695     info->memory       = irecv[3];
1696     info->mallocs      = irecv[4];
1697   }
1698   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1699   info->fill_ratio_needed = 0;
1700   info->factor_mallocs    = 0;
1701 
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatSetOption_MPIAIJ"
1707 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1708 {
1709   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   switch (op) {
1714   case MAT_NEW_NONZERO_LOCATIONS:
1715   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1716   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1717   case MAT_KEEP_NONZERO_PATTERN:
1718   case MAT_NEW_NONZERO_LOCATION_ERR:
1719   case MAT_USE_INODES:
1720   case MAT_IGNORE_ZERO_ENTRIES:
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_ROW_ORIENTED:
1725     a->roworiented = flg;
1726     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1727     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_NEW_DIAGONALS:
1730     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1731     break;
1732   case MAT_IGNORE_OFF_PROC_ENTRIES:
1733     a->donotstash = PETSC_TRUE;
1734     break;
1735   case MAT_SPD:
1736     A->spd_set                         = PETSC_TRUE;
1737     A->spd                             = flg;
1738     if (flg) {
1739       A->symmetric                     = PETSC_TRUE;
1740       A->structurally_symmetric        = PETSC_TRUE;
1741       A->symmetric_set                 = PETSC_TRUE;
1742       A->structurally_symmetric_set    = PETSC_TRUE;
1743     }
1744     break;
1745   case MAT_SYMMETRIC:
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_STRUCTURALLY_SYMMETRIC:
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_HERMITIAN:
1752     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_SYMMETRY_ETERNAL:
1755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1756     break;
1757   default:
1758     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatGetRow_MPIAIJ"
1765 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1766 {
1767   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1768   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1769   PetscErrorCode ierr;
1770   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1771   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1772   PetscInt       *cmap,*idx_p;
1773 
1774   PetscFunctionBegin;
1775   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1776   mat->getrowactive = PETSC_TRUE;
1777 
1778   if (!mat->rowvalues && (idx || v)) {
1779     /*
1780         allocate enough space to hold information from the longest row.
1781     */
1782     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1783     PetscInt   max = 1,tmp;
1784     for (i=0; i<matin->rmap->n; i++) {
1785       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1786       if (max < tmp) { max = tmp; }
1787     }
1788     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1789   }
1790 
1791   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1792   lrow = row - rstart;
1793 
1794   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1795   if (!v)   {pvA = 0; pvB = 0;}
1796   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1797   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1798   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1799   nztot = nzA + nzB;
1800 
1801   cmap  = mat->garray;
1802   if (v  || idx) {
1803     if (nztot) {
1804       /* Sort by increasing column numbers, assuming A and B already sorted */
1805       PetscInt imark = -1;
1806       if (v) {
1807         *v = v_p = mat->rowvalues;
1808         for (i=0; i<nzB; i++) {
1809           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1810           else break;
1811         }
1812         imark = i;
1813         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1814         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1815       }
1816       if (idx) {
1817         *idx = idx_p = mat->rowindices;
1818         if (imark > -1) {
1819           for (i=0; i<imark; i++) {
1820             idx_p[i] = cmap[cworkB[i]];
1821           }
1822         } else {
1823           for (i=0; i<nzB; i++) {
1824             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1825             else break;
1826           }
1827           imark = i;
1828         }
1829         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1830         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1831       }
1832     } else {
1833       if (idx) *idx = 0;
1834       if (v)   *v   = 0;
1835     }
1836   }
1837   *nz = nztot;
1838   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1845 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1848 
1849   PetscFunctionBegin;
1850   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1851   aij->getrowactive = PETSC_FALSE;
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatNorm_MPIAIJ"
1857 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1858 {
1859   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1860   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1861   PetscErrorCode ierr;
1862   PetscInt       i,j,cstart = mat->cmap->rstart;
1863   PetscReal      sum = 0.0;
1864   MatScalar      *v;
1865 
1866   PetscFunctionBegin;
1867   if (aij->size == 1) {
1868     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1869   } else {
1870     if (type == NORM_FROBENIUS) {
1871       v = amat->a;
1872       for (i=0; i<amat->nz; i++) {
1873 #if defined(PETSC_USE_COMPLEX)
1874         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1875 #else
1876         sum += (*v)*(*v); v++;
1877 #endif
1878       }
1879       v = bmat->a;
1880       for (i=0; i<bmat->nz; i++) {
1881 #if defined(PETSC_USE_COMPLEX)
1882         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1883 #else
1884         sum += (*v)*(*v); v++;
1885 #endif
1886       }
1887       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1888       *norm = sqrt(*norm);
1889     } else if (type == NORM_1) { /* max column norm */
1890       PetscReal *tmp,*tmp2;
1891       PetscInt  *jj,*garray = aij->garray;
1892       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1893       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1894       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1895       *norm = 0.0;
1896       v = amat->a; jj = amat->j;
1897       for (j=0; j<amat->nz; j++) {
1898         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1899       }
1900       v = bmat->a; jj = bmat->j;
1901       for (j=0; j<bmat->nz; j++) {
1902         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1903       }
1904       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1905       for (j=0; j<mat->cmap->N; j++) {
1906         if (tmp2[j] > *norm) *norm = tmp2[j];
1907       }
1908       ierr = PetscFree(tmp);CHKERRQ(ierr);
1909       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1910     } else if (type == NORM_INFINITY) { /* max row norm */
1911       PetscReal ntemp = 0.0;
1912       for (j=0; j<aij->A->rmap->n; j++) {
1913         v = amat->a + amat->i[j];
1914         sum = 0.0;
1915         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1916           sum += PetscAbsScalar(*v); v++;
1917         }
1918         v = bmat->a + bmat->i[j];
1919         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1920           sum += PetscAbsScalar(*v); v++;
1921         }
1922         if (sum > ntemp) ntemp = sum;
1923       }
1924       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1925     } else {
1926       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1927     }
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatTranspose_MPIAIJ"
1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935 {
1936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1937   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1938   PetscErrorCode ierr;
1939   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1940   PetscInt       cstart=A->cmap->rstart,ncol;
1941   Mat            B;
1942   MatScalar      *array;
1943 
1944   PetscFunctionBegin;
1945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1946 
1947   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1948   ai = Aloc->i; aj = Aloc->j;
1949   bi = Bloc->i; bj = Bloc->j;
1950   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1952     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1953     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1954     for (i=0; i<ai[ma]; i++){
1955       d_nnz[aj[i]] ++;
1956       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1957     }
1958 
1959     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1960     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1962     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1963     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1964   } else {
1965     B = *matout;
1966   }
1967 
1968   /* copy over the A part */
1969   array = Aloc->a;
1970   row = A->rmap->rstart;
1971   for (i=0; i<ma; i++) {
1972     ncol = ai[i+1]-ai[i];
1973     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1974     row++; array += ncol; aj += ncol;
1975   }
1976   aj = Aloc->j;
1977   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1978 
1979   /* copy over the B part */
1980   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1981   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1982   array = Bloc->a;
1983   row = A->rmap->rstart;
1984   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1985   cols_tmp = cols;
1986   for (i=0; i<mb; i++) {
1987     ncol = bi[i+1]-bi[i];
1988     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1989     row++; array += ncol; cols_tmp += ncol;
1990   }
1991   ierr = PetscFree(cols);CHKERRQ(ierr);
1992 
1993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1996     *matout = B;
1997   } else {
1998     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2005 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2006 {
2007   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2008   Mat            a = aij->A,b = aij->B;
2009   PetscErrorCode ierr;
2010   PetscInt       s1,s2,s3;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2014   if (rr) {
2015     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2016     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2017     /* Overlap communication with computation. */
2018     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2019   }
2020   if (ll) {
2021     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2022     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2023     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2024   }
2025   /* scale  the diagonal block */
2026   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2027 
2028   if (rr) {
2029     /* Do a scatter end and then right scale the off-diagonal block */
2030     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2031     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2032   }
2033 
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2039 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2040 {
2041   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2046   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2047   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2048   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2053 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2054 {
2055   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatEqual_MPIAIJ"
2065 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2066 {
2067   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2068   Mat            a,b,c,d;
2069   PetscBool      flg;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   a = matA->A; b = matA->B;
2074   c = matB->A; d = matB->B;
2075 
2076   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2077   if (flg) {
2078     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2079   }
2080   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatCopy_MPIAIJ"
2086 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2087 {
2088   PetscErrorCode ierr;
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2090   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2091 
2092   PetscFunctionBegin;
2093   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2094   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2095     /* because of the column compression in the off-processor part of the matrix a->B,
2096        the number of columns in a->B and b->B may be different, hence we cannot call
2097        the MatCopy() directly on the two parts. If need be, we can provide a more
2098        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2099        then copying the submatrices */
2100     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2101   } else {
2102     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2103     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2104   }
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2110 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2111 {
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatAXPY_MPIAIJ"
2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2122 {
2123   PetscErrorCode ierr;
2124   PetscInt       i;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x = (Mat_SeqAIJ *)xx->A->data;
2133     y = (Mat_SeqAIJ *)yy->A->data;
2134     bnz = PetscBLASIntCast(x->nz);
2135     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2136     x = (Mat_SeqAIJ *)xx->B->data;
2137     y = (Mat_SeqAIJ *)yy->B->data;
2138     bnz = PetscBLASIntCast(x->nz);
2139     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2140   } else if (str == SUBSET_NONZERO_PATTERN) {
2141     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2142 
2143     x = (Mat_SeqAIJ *)xx->B->data;
2144     y = (Mat_SeqAIJ *)yy->B->data;
2145     if (y->xtoy && y->XtoY != xx->B) {
2146       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2147       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2148     }
2149     if (!y->xtoy) { /* get xtoy */
2150       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2151       y->XtoY = xx->B;
2152       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2153     }
2154     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2155   } else {
2156     Mat B;
2157     PetscInt *nnz_d,*nnz_o;
2158     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2159     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2160     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2161     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2162     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2163     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2164     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2165     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2166     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2167     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2168     ierr = MatHeaderReplace(Y,B);
2169     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2170     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatConjugate_MPIAIJ"
2179 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2180 {
2181 #if defined(PETSC_USE_COMPLEX)
2182   PetscErrorCode ierr;
2183   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2184 
2185   PetscFunctionBegin;
2186   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2187   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2188 #else
2189   PetscFunctionBegin;
2190 #endif
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatRealPart_MPIAIJ"
2196 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2197 {
2198   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2203   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2209 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2210 {
2211   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2216   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #ifdef PETSC_HAVE_PBGL
2221 
2222 #include <boost/parallel/mpi/bsp_process_group.hpp>
2223 #include <boost/graph/distributed/ilu_default_graph.hpp>
2224 #include <boost/graph/distributed/ilu_0_block.hpp>
2225 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2226 #include <boost/graph/distributed/petsc/interface.hpp>
2227 #include <boost/multi_array.hpp>
2228 #include <boost/parallel/distributed_property_map->hpp>
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2232 /*
2233   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2234 */
2235 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2236 {
2237   namespace petsc = boost::distributed::petsc;
2238 
2239   namespace graph_dist = boost::graph::distributed;
2240   using boost::graph::distributed::ilu_default::process_group_type;
2241   using boost::graph::ilu_permuted;
2242 
2243   PetscBool       row_identity, col_identity;
2244   PetscContainer  c;
2245   PetscInt        m, n, M, N;
2246   PetscErrorCode  ierr;
2247 
2248   PetscFunctionBegin;
2249   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2250   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2251   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2252   if (!row_identity || !col_identity) {
2253     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2254   }
2255 
2256   process_group_type pg;
2257   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2258   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2259   lgraph_type&   level_graph = *lgraph_p;
2260   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2261 
2262   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2263   ilu_permuted(level_graph);
2264 
2265   /* put together the new matrix */
2266   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2267   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2268   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2269   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2270   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2271   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273 
2274   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2275   ierr = PetscContainerSetPointer(c, lgraph_p);
2276   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2277   ierr = PetscContainerDestroy(&c);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2283 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2284 {
2285   PetscFunctionBegin;
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatSolve_MPIAIJ"
2291 /*
2292   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2293 */
2294 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2295 {
2296   namespace graph_dist = boost::graph::distributed;
2297 
2298   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2299   lgraph_type*   lgraph_p;
2300   PetscContainer c;
2301   PetscErrorCode ierr;
2302 
2303   PetscFunctionBegin;
2304   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2305   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2306   ierr = VecCopy(b, x);CHKERRQ(ierr);
2307 
2308   PetscScalar* array_x;
2309   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2310   PetscInt sx;
2311   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2312 
2313   PetscScalar* array_b;
2314   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2315   PetscInt sb;
2316   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2317 
2318   lgraph_type&   level_graph = *lgraph_p;
2319   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2320 
2321   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2322   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2323                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2324 
2325   typedef boost::iterator_property_map<array_ref_type::iterator,
2326                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2327   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2328                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2329 
2330   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2331 
2332   PetscFunctionReturn(0);
2333 }
2334 #endif
2335 
2336 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2337   PetscInt       nzlocal,nsends,nrecvs;
2338   PetscMPIInt    *send_rank,*recv_rank;
2339   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2340   PetscScalar    *sbuf_a,**rbuf_a;
2341   PetscErrorCode (*Destroy)(Mat);
2342 } Mat_Redundant;
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2346 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2347 {
2348   PetscErrorCode       ierr;
2349   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2350   PetscInt             i;
2351 
2352   PetscFunctionBegin;
2353   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2354   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2355   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2356   for (i=0; i<redund->nrecvs; i++){
2357     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2358     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2359   }
2360   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2361   ierr = PetscFree(redund);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatDestroy_MatRedundant"
2367 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2368 {
2369   PetscErrorCode  ierr;
2370   PetscContainer  container;
2371   Mat_Redundant   *redund=PETSC_NULL;
2372 
2373   PetscFunctionBegin;
2374   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2375   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2376   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2377   A->ops->destroy = redund->Destroy;
2378   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2379   if (A->ops->destroy) {
2380     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2387 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2388 {
2389   PetscMPIInt    rank,size;
2390   MPI_Comm       comm=((PetscObject)mat)->comm;
2391   PetscErrorCode ierr;
2392   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2393   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2394   PetscInt       *rowrange=mat->rmap->range;
2395   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2396   Mat            A=aij->A,B=aij->B,C=*matredundant;
2397   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2398   PetscScalar    *sbuf_a;
2399   PetscInt       nzlocal=a->nz+b->nz;
2400   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2401   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2402   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2403   MatScalar      *aworkA,*aworkB;
2404   PetscScalar    *vals;
2405   PetscMPIInt    tag1,tag2,tag3,imdex;
2406   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2407                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2408   MPI_Status     recv_status,*send_status;
2409   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2410   PetscInt       **rbuf_j=PETSC_NULL;
2411   PetscScalar    **rbuf_a=PETSC_NULL;
2412   Mat_Redundant  *redund=PETSC_NULL;
2413   PetscContainer container;
2414 
2415   PetscFunctionBegin;
2416   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2417   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2418 
2419   if (reuse == MAT_REUSE_MATRIX) {
2420     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2421     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2422     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2423     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2424     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2425     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2426     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2427     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2428 
2429     nsends    = redund->nsends;
2430     nrecvs    = redund->nrecvs;
2431     send_rank = redund->send_rank;
2432     recv_rank = redund->recv_rank;
2433     sbuf_nz   = redund->sbuf_nz;
2434     rbuf_nz   = redund->rbuf_nz;
2435     sbuf_j    = redund->sbuf_j;
2436     sbuf_a    = redund->sbuf_a;
2437     rbuf_j    = redund->rbuf_j;
2438     rbuf_a    = redund->rbuf_a;
2439   }
2440 
2441   if (reuse == MAT_INITIAL_MATRIX){
2442     PetscMPIInt  subrank,subsize;
2443     PetscInt     nleftover,np_subcomm;
2444     /* get the destination processors' id send_rank, nsends and nrecvs */
2445     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2446     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2447     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2448     np_subcomm = size/nsubcomm;
2449     nleftover  = size - nsubcomm*np_subcomm;
2450     nsends = 0; nrecvs = 0;
2451     for (i=0; i<size; i++){ /* i=rank*/
2452       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2453         send_rank[nsends] = i; nsends++;
2454         recv_rank[nrecvs++] = i;
2455       }
2456     }
2457     if (rank >= size - nleftover){/* this proc is a leftover processor */
2458       i = size-nleftover-1;
2459       j = 0;
2460       while (j < nsubcomm - nleftover){
2461         send_rank[nsends++] = i;
2462         i--; j++;
2463       }
2464     }
2465 
2466     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2467       for (i=0; i<nleftover; i++){
2468         recv_rank[nrecvs++] = size-nleftover+i;
2469       }
2470     }
2471 
2472     /* allocate sbuf_j, sbuf_a */
2473     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2474     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2475     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2476   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2477 
2478   /* copy mat's local entries into the buffers */
2479   if (reuse == MAT_INITIAL_MATRIX){
2480     rownz_max = 0;
2481     rptr = sbuf_j;
2482     cols = sbuf_j + rend-rstart + 1;
2483     vals = sbuf_a;
2484     rptr[0] = 0;
2485     for (i=0; i<rend-rstart; i++){
2486       row = i + rstart;
2487       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2488       ncols  = nzA + nzB;
2489       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2490       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2491       /* load the column indices for this row into cols */
2492       lwrite = 0;
2493       for (l=0; l<nzB; l++) {
2494         if ((ctmp = bmap[cworkB[l]]) < cstart){
2495           vals[lwrite]   = aworkB[l];
2496           cols[lwrite++] = ctmp;
2497         }
2498       }
2499       for (l=0; l<nzA; l++){
2500         vals[lwrite]   = aworkA[l];
2501         cols[lwrite++] = cstart + cworkA[l];
2502       }
2503       for (l=0; l<nzB; l++) {
2504         if ((ctmp = bmap[cworkB[l]]) >= cend){
2505           vals[lwrite]   = aworkB[l];
2506           cols[lwrite++] = ctmp;
2507         }
2508       }
2509       vals += ncols;
2510       cols += ncols;
2511       rptr[i+1] = rptr[i] + ncols;
2512       if (rownz_max < ncols) rownz_max = ncols;
2513     }
2514     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2515   } else { /* only copy matrix values into sbuf_a */
2516     rptr = sbuf_j;
2517     vals = sbuf_a;
2518     rptr[0] = 0;
2519     for (i=0; i<rend-rstart; i++){
2520       row = i + rstart;
2521       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2522       ncols  = nzA + nzB;
2523       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2524       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2525       lwrite = 0;
2526       for (l=0; l<nzB; l++) {
2527         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2528       }
2529       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2530       for (l=0; l<nzB; l++) {
2531         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2532       }
2533       vals += ncols;
2534       rptr[i+1] = rptr[i] + ncols;
2535     }
2536   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2537 
2538   /* send nzlocal to others, and recv other's nzlocal */
2539   /*--------------------------------------------------*/
2540   if (reuse == MAT_INITIAL_MATRIX){
2541     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2542     s_waits2 = s_waits3 + nsends;
2543     s_waits1 = s_waits2 + nsends;
2544     r_waits1 = s_waits1 + nsends;
2545     r_waits2 = r_waits1 + nrecvs;
2546     r_waits3 = r_waits2 + nrecvs;
2547   } else {
2548     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2549     r_waits3 = s_waits3 + nsends;
2550   }
2551 
2552   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2553   if (reuse == MAT_INITIAL_MATRIX){
2554     /* get new tags to keep the communication clean */
2555     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2556     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2557     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2558 
2559     /* post receives of other's nzlocal */
2560     for (i=0; i<nrecvs; i++){
2561       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2562     }
2563     /* send nzlocal to others */
2564     for (i=0; i<nsends; i++){
2565       sbuf_nz[i] = nzlocal;
2566       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2567     }
2568     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2569     count = nrecvs;
2570     while (count) {
2571       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2572       recv_rank[imdex] = recv_status.MPI_SOURCE;
2573       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2574       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2575 
2576       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2577       rbuf_nz[imdex] += i + 2;
2578       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2579       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2580       count--;
2581     }
2582     /* wait on sends of nzlocal */
2583     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2584     /* send mat->i,j to others, and recv from other's */
2585     /*------------------------------------------------*/
2586     for (i=0; i<nsends; i++){
2587       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2588       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2589     }
2590     /* wait on receives of mat->i,j */
2591     /*------------------------------*/
2592     count = nrecvs;
2593     while (count) {
2594       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2595       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2596       count--;
2597     }
2598     /* wait on sends of mat->i,j */
2599     /*---------------------------*/
2600     if (nsends) {
2601       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2602     }
2603   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2604 
2605   /* post receives, send and receive mat->a */
2606   /*----------------------------------------*/
2607   for (imdex=0; imdex<nrecvs; imdex++) {
2608     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2609   }
2610   for (i=0; i<nsends; i++){
2611     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2612   }
2613   count = nrecvs;
2614   while (count) {
2615     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2616     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2617     count--;
2618   }
2619   if (nsends) {
2620     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2621   }
2622 
2623   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2624 
2625   /* create redundant matrix */
2626   /*-------------------------*/
2627   if (reuse == MAT_INITIAL_MATRIX){
2628     /* compute rownz_max for preallocation */
2629     for (imdex=0; imdex<nrecvs; imdex++){
2630       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2631       rptr = rbuf_j[imdex];
2632       for (i=0; i<j; i++){
2633         ncols = rptr[i+1] - rptr[i];
2634         if (rownz_max < ncols) rownz_max = ncols;
2635       }
2636     }
2637 
2638     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2639     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2640     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2641     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2642     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2643   } else {
2644     C = *matredundant;
2645   }
2646 
2647   /* insert local matrix entries */
2648   rptr = sbuf_j;
2649   cols = sbuf_j + rend-rstart + 1;
2650   vals = sbuf_a;
2651   for (i=0; i<rend-rstart; i++){
2652     row   = i + rstart;
2653     ncols = rptr[i+1] - rptr[i];
2654     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2655     vals += ncols;
2656     cols += ncols;
2657   }
2658   /* insert received matrix entries */
2659   for (imdex=0; imdex<nrecvs; imdex++){
2660     rstart = rowrange[recv_rank[imdex]];
2661     rend   = rowrange[recv_rank[imdex]+1];
2662     rptr = rbuf_j[imdex];
2663     cols = rbuf_j[imdex] + rend-rstart + 1;
2664     vals = rbuf_a[imdex];
2665     for (i=0; i<rend-rstart; i++){
2666       row   = i + rstart;
2667       ncols = rptr[i+1] - rptr[i];
2668       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2669       vals += ncols;
2670       cols += ncols;
2671     }
2672   }
2673   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2674   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2675   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2676   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2677   if (reuse == MAT_INITIAL_MATRIX) {
2678     PetscContainer container;
2679     *matredundant = C;
2680     /* create a supporting struct and attach it to C for reuse */
2681     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2682     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2683     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2684     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2685     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2686     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2687 
2688     redund->nzlocal = nzlocal;
2689     redund->nsends  = nsends;
2690     redund->nrecvs  = nrecvs;
2691     redund->send_rank = send_rank;
2692     redund->recv_rank = recv_rank;
2693     redund->sbuf_nz = sbuf_nz;
2694     redund->rbuf_nz = rbuf_nz;
2695     redund->sbuf_j  = sbuf_j;
2696     redund->sbuf_a  = sbuf_a;
2697     redund->rbuf_j  = rbuf_j;
2698     redund->rbuf_a  = rbuf_a;
2699 
2700     redund->Destroy = C->ops->destroy;
2701     C->ops->destroy = MatDestroy_MatRedundant;
2702   }
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2708 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2709 {
2710   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2711   PetscErrorCode ierr;
2712   PetscInt       i,*idxb = 0;
2713   PetscScalar    *va,*vb;
2714   Vec            vtmp;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2718   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2719   if (idx) {
2720     for (i=0; i<A->rmap->n; i++) {
2721       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2722     }
2723   }
2724 
2725   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2726   if (idx) {
2727     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2728   }
2729   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2730   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2731 
2732   for (i=0; i<A->rmap->n; i++){
2733     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2734       va[i] = vb[i];
2735       if (idx) idx[i] = a->garray[idxb[i]];
2736     }
2737   }
2738 
2739   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2740   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2741   ierr = PetscFree(idxb);CHKERRQ(ierr);
2742   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2748 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2749 {
2750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2751   PetscErrorCode ierr;
2752   PetscInt       i,*idxb = 0;
2753   PetscScalar    *va,*vb;
2754   Vec            vtmp;
2755 
2756   PetscFunctionBegin;
2757   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2758   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2759   if (idx) {
2760     for (i=0; i<A->cmap->n; i++) {
2761       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2762     }
2763   }
2764 
2765   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2766   if (idx) {
2767     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2768   }
2769   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2770   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2771 
2772   for (i=0; i<A->rmap->n; i++){
2773     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2774       va[i] = vb[i];
2775       if (idx) idx[i] = a->garray[idxb[i]];
2776     }
2777   }
2778 
2779   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2780   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2781   ierr = PetscFree(idxb);CHKERRQ(ierr);
2782   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 #undef __FUNCT__
2787 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2788 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2789 {
2790   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2791   PetscInt       n      = A->rmap->n;
2792   PetscInt       cstart = A->cmap->rstart;
2793   PetscInt      *cmap   = mat->garray;
2794   PetscInt      *diagIdx, *offdiagIdx;
2795   Vec            diagV, offdiagV;
2796   PetscScalar   *a, *diagA, *offdiagA;
2797   PetscInt       r;
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2802   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2803   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2804   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2805   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2806   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2807   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2808   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2809   for(r = 0; r < n; ++r) {
2810     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2811       a[r]   = diagA[r];
2812       idx[r] = cstart + diagIdx[r];
2813     } else {
2814       a[r]   = offdiagA[r];
2815       idx[r] = cmap[offdiagIdx[r]];
2816     }
2817   }
2818   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2819   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2820   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2821   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2822   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2823   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 #undef __FUNCT__
2828 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2829 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2830 {
2831   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2832   PetscInt       n      = A->rmap->n;
2833   PetscInt       cstart = A->cmap->rstart;
2834   PetscInt      *cmap   = mat->garray;
2835   PetscInt      *diagIdx, *offdiagIdx;
2836   Vec            diagV, offdiagV;
2837   PetscScalar   *a, *diagA, *offdiagA;
2838   PetscInt       r;
2839   PetscErrorCode ierr;
2840 
2841   PetscFunctionBegin;
2842   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2843   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2844   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2845   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2846   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2847   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2848   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2849   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2850   for(r = 0; r < n; ++r) {
2851     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2852       a[r]   = diagA[r];
2853       idx[r] = cstart + diagIdx[r];
2854     } else {
2855       a[r]   = offdiagA[r];
2856       idx[r] = cmap[offdiagIdx[r]];
2857     }
2858   }
2859   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2860   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2861   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2862   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2863   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2864   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2870 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2871 {
2872   PetscErrorCode ierr;
2873   Mat            *dummy;
2874 
2875   PetscFunctionBegin;
2876   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2877   *newmat = *dummy;
2878   ierr = PetscFree(dummy);CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2883 /* -------------------------------------------------------------------*/
2884 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2885        MatGetRow_MPIAIJ,
2886        MatRestoreRow_MPIAIJ,
2887        MatMult_MPIAIJ,
2888 /* 4*/ MatMultAdd_MPIAIJ,
2889        MatMultTranspose_MPIAIJ,
2890        MatMultTransposeAdd_MPIAIJ,
2891 #ifdef PETSC_HAVE_PBGL
2892        MatSolve_MPIAIJ,
2893 #else
2894        0,
2895 #endif
2896        0,
2897        0,
2898 /*10*/ 0,
2899        0,
2900        0,
2901        MatSOR_MPIAIJ,
2902        MatTranspose_MPIAIJ,
2903 /*15*/ MatGetInfo_MPIAIJ,
2904        MatEqual_MPIAIJ,
2905        MatGetDiagonal_MPIAIJ,
2906        MatDiagonalScale_MPIAIJ,
2907        MatNorm_MPIAIJ,
2908 /*20*/ MatAssemblyBegin_MPIAIJ,
2909        MatAssemblyEnd_MPIAIJ,
2910        MatSetOption_MPIAIJ,
2911        MatZeroEntries_MPIAIJ,
2912 /*24*/ MatZeroRows_MPIAIJ,
2913        0,
2914 #ifdef PETSC_HAVE_PBGL
2915        0,
2916 #else
2917        0,
2918 #endif
2919        0,
2920        0,
2921 /*29*/ MatSetUpPreallocation_MPIAIJ,
2922 #ifdef PETSC_HAVE_PBGL
2923        0,
2924 #else
2925        0,
2926 #endif
2927        0,
2928        0,
2929        0,
2930 /*34*/ MatDuplicate_MPIAIJ,
2931        0,
2932        0,
2933        0,
2934        0,
2935 /*39*/ MatAXPY_MPIAIJ,
2936        MatGetSubMatrices_MPIAIJ,
2937        MatIncreaseOverlap_MPIAIJ,
2938        MatGetValues_MPIAIJ,
2939        MatCopy_MPIAIJ,
2940 /*44*/ MatGetRowMax_MPIAIJ,
2941        MatScale_MPIAIJ,
2942        0,
2943        0,
2944        MatZeroRowsColumns_MPIAIJ,
2945 /*49*/ MatSetBlockSize_MPIAIJ,
2946        0,
2947        0,
2948        0,
2949        0,
2950 /*54*/ MatFDColoringCreate_MPIAIJ,
2951        0,
2952        MatSetUnfactored_MPIAIJ,
2953        MatPermute_MPIAIJ,
2954        0,
2955 /*59*/ MatGetSubMatrix_MPIAIJ,
2956        MatDestroy_MPIAIJ,
2957        MatView_MPIAIJ,
2958        0,
2959        0,
2960 /*64*/ 0,
2961        0,
2962        0,
2963        0,
2964        0,
2965 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2966        MatGetRowMinAbs_MPIAIJ,
2967        0,
2968        MatSetColoring_MPIAIJ,
2969 #if defined(PETSC_HAVE_ADIC)
2970        MatSetValuesAdic_MPIAIJ,
2971 #else
2972        0,
2973 #endif
2974        MatSetValuesAdifor_MPIAIJ,
2975 /*75*/ MatFDColoringApply_AIJ,
2976        0,
2977        0,
2978        0,
2979        0,
2980 /*80*/ 0,
2981        0,
2982        0,
2983 /*83*/ MatLoad_MPIAIJ,
2984        0,
2985        0,
2986        0,
2987        0,
2988        0,
2989 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2990        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2991        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2992        MatPtAP_Basic,
2993        MatPtAPSymbolic_MPIAIJ,
2994 /*94*/ MatPtAPNumeric_MPIAIJ,
2995        0,
2996        0,
2997        0,
2998        0,
2999 /*99*/ 0,
3000        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3001        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3002        MatConjugate_MPIAIJ,
3003        0,
3004 /*104*/MatSetValuesRow_MPIAIJ,
3005        MatRealPart_MPIAIJ,
3006        MatImaginaryPart_MPIAIJ,
3007        0,
3008        0,
3009 /*109*/0,
3010        MatGetRedundantMatrix_MPIAIJ,
3011        MatGetRowMin_MPIAIJ,
3012        0,
3013        0,
3014 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3015        0,
3016        0,
3017        0,
3018        0,
3019 /*119*/0,
3020        0,
3021        0,
3022        0,
3023        MatGetMultiProcBlock_MPIAIJ,
3024 /*124*/MatFindNonZeroRows_MPIAIJ,
3025        0,
3026        0,
3027        0,
3028        MatGetSubMatricesParallel_MPIAIJ
3029 };
3030 
3031 /* ----------------------------------------------------------------------------------------*/
3032 
3033 EXTERN_C_BEGIN
3034 #undef __FUNCT__
3035 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3036 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3037 {
3038   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3039   PetscErrorCode ierr;
3040 
3041   PetscFunctionBegin;
3042   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3043   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3044   PetscFunctionReturn(0);
3045 }
3046 EXTERN_C_END
3047 
3048 EXTERN_C_BEGIN
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3051 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3052 {
3053   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3054   PetscErrorCode ierr;
3055 
3056   PetscFunctionBegin;
3057   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3058   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 EXTERN_C_END
3062 
3063 EXTERN_C_BEGIN
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3066 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3067 {
3068   Mat_MPIAIJ     *b;
3069   PetscErrorCode ierr;
3070   PetscInt       i;
3071 
3072   PetscFunctionBegin;
3073   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3074   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3075   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3076   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3077 
3078   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3079   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3080   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3081   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3082   if (d_nnz) {
3083     for (i=0; i<B->rmap->n; i++) {
3084       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3085     }
3086   }
3087   if (o_nnz) {
3088     for (i=0; i<B->rmap->n; i++) {
3089       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3090     }
3091   }
3092   b = (Mat_MPIAIJ*)B->data;
3093 
3094   if (!B->preallocated) {
3095     /* Explicitly create 2 MATSEQAIJ matrices. */
3096     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3097     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3098     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3099     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3100     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3101     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3102     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3103     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3104   }
3105 
3106   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3107   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3108   B->preallocated = PETSC_TRUE;
3109   PetscFunctionReturn(0);
3110 }
3111 EXTERN_C_END
3112 
3113 #undef __FUNCT__
3114 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3115 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3116 {
3117   Mat            mat;
3118   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3119   PetscErrorCode ierr;
3120 
3121   PetscFunctionBegin;
3122   *newmat       = 0;
3123   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3124   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3125   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3126   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3127   a    = (Mat_MPIAIJ*)mat->data;
3128 
3129   mat->factortype    = matin->factortype;
3130   mat->rmap->bs      = matin->rmap->bs;
3131   mat->assembled    = PETSC_TRUE;
3132   mat->insertmode   = NOT_SET_VALUES;
3133   mat->preallocated = PETSC_TRUE;
3134 
3135   a->size           = oldmat->size;
3136   a->rank           = oldmat->rank;
3137   a->donotstash     = oldmat->donotstash;
3138   a->roworiented    = oldmat->roworiented;
3139   a->rowindices     = 0;
3140   a->rowvalues      = 0;
3141   a->getrowactive   = PETSC_FALSE;
3142 
3143   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3144   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3145 
3146   if (oldmat->colmap) {
3147 #if defined (PETSC_USE_CTABLE)
3148     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3149 #else
3150     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3151     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3152     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3153 #endif
3154   } else a->colmap = 0;
3155   if (oldmat->garray) {
3156     PetscInt len;
3157     len  = oldmat->B->cmap->n;
3158     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3159     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3160     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3161   } else a->garray = 0;
3162 
3163   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3164   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3165   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3166   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3167   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3168   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3169   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3170   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3171   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3172   *newmat = mat;
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 /*
3177     Allows sending/receiving larger messages then 2 gigabytes in a single call
3178 */
3179 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3180 {
3181   int             ierr;
3182   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3183   PetscInt        i,numchunks;
3184   PetscMPIInt     icnt;
3185 
3186   numchunks = cnt/CHUNKSIZE + 1;
3187   for (i=0; i<numchunks; i++) {
3188     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3189     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3190     if (type == MPIU_INT) {
3191       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3192     } else if (type == MPIU_SCALAR) {
3193       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3194     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3195   }
3196   return 0;
3197 }
3198 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3199 {
3200   int             ierr;
3201   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3202   MPI_Status      status;
3203   PetscInt        i,numchunks;
3204   PetscMPIInt     icnt;
3205 
3206   numchunks = cnt/CHUNKSIZE + 1;
3207   for (i=0; i<numchunks; i++) {
3208     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3209     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3210     if (type == MPIU_INT) {
3211       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3212     } else if (type == MPIU_SCALAR) {
3213       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3214     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3215   }
3216   return 0;
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "MatLoad_MPIAIJ"
3221 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3222 {
3223   PetscScalar    *vals,*svals;
3224   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3225   PetscErrorCode ierr;
3226   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3227   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3228   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3229   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3230   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3231   int            fd;
3232 
3233   PetscFunctionBegin;
3234   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3235   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3236   if (!rank) {
3237     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3238     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3239     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3240   }
3241 
3242   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3243 
3244   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3245   M = header[1]; N = header[2];
3246   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3247   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3248   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3249 
3250   /* If global sizes are set, check if they are consistent with that given in the file */
3251   if (sizesset) {
3252     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3253   }
3254   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3255   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3256 
3257   /* determine ownership of all rows */
3258   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3259   else m = newMat->rmap->n; /* Set by user */
3260 
3261   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3262   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3263 
3264   /* First process needs enough room for process with most rows */
3265   if (!rank) {
3266     mmax       = rowners[1];
3267     for (i=2; i<size; i++) {
3268       mmax = PetscMax(mmax,rowners[i]);
3269     }
3270   } else mmax = m;
3271 
3272   rowners[0] = 0;
3273   for (i=2; i<=size; i++) {
3274     rowners[i] += rowners[i-1];
3275   }
3276   rstart = rowners[rank];
3277   rend   = rowners[rank+1];
3278 
3279   /* distribute row lengths to all processors */
3280   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3281   if (!rank) {
3282     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3283     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3284     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3285     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3286     for (j=0; j<m; j++) {
3287       procsnz[0] += ourlens[j];
3288     }
3289     for (i=1; i<size; i++) {
3290       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3291       /* calculate the number of nonzeros on each processor */
3292       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3293         procsnz[i] += rowlengths[j];
3294       }
3295       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3296     }
3297     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3298   } else {
3299     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3300   }
3301 
3302   if (!rank) {
3303     /* determine max buffer needed and allocate it */
3304     maxnz = 0;
3305     for (i=0; i<size; i++) {
3306       maxnz = PetscMax(maxnz,procsnz[i]);
3307     }
3308     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3309 
3310     /* read in my part of the matrix column indices  */
3311     nz   = procsnz[0];
3312     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3313     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3314 
3315     /* read in every one elses and ship off */
3316     for (i=1; i<size; i++) {
3317       nz     = procsnz[i];
3318       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3319       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3320     }
3321     ierr = PetscFree(cols);CHKERRQ(ierr);
3322   } else {
3323     /* determine buffer space needed for message */
3324     nz = 0;
3325     for (i=0; i<m; i++) {
3326       nz += ourlens[i];
3327     }
3328     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3329 
3330     /* receive message of column indices*/
3331     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3332   }
3333 
3334   /* determine column ownership if matrix is not square */
3335   if (N != M) {
3336     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3337     else n = newMat->cmap->n;
3338     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3339     cstart = cend - n;
3340   } else {
3341     cstart = rstart;
3342     cend   = rend;
3343     n      = cend - cstart;
3344   }
3345 
3346   /* loop over local rows, determining number of off diagonal entries */
3347   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3348   jj = 0;
3349   for (i=0; i<m; i++) {
3350     for (j=0; j<ourlens[i]; j++) {
3351       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3352       jj++;
3353     }
3354   }
3355 
3356   for (i=0; i<m; i++) {
3357     ourlens[i] -= offlens[i];
3358   }
3359   if (!sizesset) {
3360     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3361   }
3362   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3363 
3364   for (i=0; i<m; i++) {
3365     ourlens[i] += offlens[i];
3366   }
3367 
3368   if (!rank) {
3369     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3370 
3371     /* read in my part of the matrix numerical values  */
3372     nz   = procsnz[0];
3373     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3374 
3375     /* insert into matrix */
3376     jj      = rstart;
3377     smycols = mycols;
3378     svals   = vals;
3379     for (i=0; i<m; i++) {
3380       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3381       smycols += ourlens[i];
3382       svals   += ourlens[i];
3383       jj++;
3384     }
3385 
3386     /* read in other processors and ship out */
3387     for (i=1; i<size; i++) {
3388       nz     = procsnz[i];
3389       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3390       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3391     }
3392     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3393   } else {
3394     /* receive numeric values */
3395     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3396 
3397     /* receive message of values*/
3398     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3399 
3400     /* insert into matrix */
3401     jj      = rstart;
3402     smycols = mycols;
3403     svals   = vals;
3404     for (i=0; i<m; i++) {
3405       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3406       smycols += ourlens[i];
3407       svals   += ourlens[i];
3408       jj++;
3409     }
3410   }
3411   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3412   ierr = PetscFree(vals);CHKERRQ(ierr);
3413   ierr = PetscFree(mycols);CHKERRQ(ierr);
3414   ierr = PetscFree(rowners);CHKERRQ(ierr);
3415 
3416   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3417   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 #undef __FUNCT__
3422 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3423 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3424 {
3425   PetscErrorCode ierr;
3426   IS             iscol_local;
3427   PetscInt       csize;
3428 
3429   PetscFunctionBegin;
3430   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3431   if (call == MAT_REUSE_MATRIX) {
3432     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3433     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3434   } else {
3435     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3436   }
3437   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3438   if (call == MAT_INITIAL_MATRIX) {
3439     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3440     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3441   }
3442   PetscFunctionReturn(0);
3443 }
3444 
3445 #undef __FUNCT__
3446 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3447 /*
3448     Not great since it makes two copies of the submatrix, first an SeqAIJ
3449   in local and then by concatenating the local matrices the end result.
3450   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3451 
3452   Note: This requires a sequential iscol with all indices.
3453 */
3454 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3455 {
3456   PetscErrorCode ierr;
3457   PetscMPIInt    rank,size;
3458   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3459   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3460   Mat            *local,M,Mreuse;
3461   MatScalar      *vwork,*aa;
3462   MPI_Comm       comm = ((PetscObject)mat)->comm;
3463   Mat_SeqAIJ     *aij;
3464 
3465 
3466   PetscFunctionBegin;
3467   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3468   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3469 
3470   if (call ==  MAT_REUSE_MATRIX) {
3471     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3472     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3473     local = &Mreuse;
3474     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3475   } else {
3476     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3477     Mreuse = *local;
3478     ierr   = PetscFree(local);CHKERRQ(ierr);
3479   }
3480 
3481   /*
3482       m - number of local rows
3483       n - number of columns (same on all processors)
3484       rstart - first row in new global matrix generated
3485   */
3486   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3487   if (call == MAT_INITIAL_MATRIX) {
3488     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3489     ii  = aij->i;
3490     jj  = aij->j;
3491 
3492     /*
3493         Determine the number of non-zeros in the diagonal and off-diagonal
3494         portions of the matrix in order to do correct preallocation
3495     */
3496 
3497     /* first get start and end of "diagonal" columns */
3498     if (csize == PETSC_DECIDE) {
3499       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3500       if (mglobal == n) { /* square matrix */
3501 	nlocal = m;
3502       } else {
3503         nlocal = n/size + ((n % size) > rank);
3504       }
3505     } else {
3506       nlocal = csize;
3507     }
3508     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3509     rstart = rend - nlocal;
3510     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3511 
3512     /* next, compute all the lengths */
3513     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3514     olens = dlens + m;
3515     for (i=0; i<m; i++) {
3516       jend = ii[i+1] - ii[i];
3517       olen = 0;
3518       dlen = 0;
3519       for (j=0; j<jend; j++) {
3520         if (*jj < rstart || *jj >= rend) olen++;
3521         else dlen++;
3522         jj++;
3523       }
3524       olens[i] = olen;
3525       dlens[i] = dlen;
3526     }
3527     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3528     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3529     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3530     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3531     ierr = PetscFree(dlens);CHKERRQ(ierr);
3532   } else {
3533     PetscInt ml,nl;
3534 
3535     M = *newmat;
3536     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3537     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3538     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3539     /*
3540          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3541        rather than the slower MatSetValues().
3542     */
3543     M->was_assembled = PETSC_TRUE;
3544     M->assembled     = PETSC_FALSE;
3545   }
3546   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3547   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3548   ii  = aij->i;
3549   jj  = aij->j;
3550   aa  = aij->a;
3551   for (i=0; i<m; i++) {
3552     row   = rstart + i;
3553     nz    = ii[i+1] - ii[i];
3554     cwork = jj;     jj += nz;
3555     vwork = aa;     aa += nz;
3556     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3557   }
3558 
3559   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3560   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3561   *newmat = M;
3562 
3563   /* save submatrix used in processor for next request */
3564   if (call ==  MAT_INITIAL_MATRIX) {
3565     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3566     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3567   }
3568 
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 EXTERN_C_BEGIN
3573 #undef __FUNCT__
3574 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3575 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3576 {
3577   PetscInt       m,cstart, cend,j,nnz,i,d;
3578   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3579   const PetscInt *JJ;
3580   PetscScalar    *values;
3581   PetscErrorCode ierr;
3582 
3583   PetscFunctionBegin;
3584   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3585 
3586   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3587   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3588   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3589   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3590   m      = B->rmap->n;
3591   cstart = B->cmap->rstart;
3592   cend   = B->cmap->rend;
3593   rstart = B->rmap->rstart;
3594 
3595   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3596 
3597 #if defined(PETSC_USE_DEBUGGING)
3598   for (i=0; i<m; i++) {
3599     nnz     = Ii[i+1]- Ii[i];
3600     JJ      = J + Ii[i];
3601     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3602     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3603     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3604   }
3605 #endif
3606 
3607   for (i=0; i<m; i++) {
3608     nnz     = Ii[i+1]- Ii[i];
3609     JJ      = J + Ii[i];
3610     nnz_max = PetscMax(nnz_max,nnz);
3611     d       = 0;
3612     for (j=0; j<nnz; j++) {
3613       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3614     }
3615     d_nnz[i] = d;
3616     o_nnz[i] = nnz - d;
3617   }
3618   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3619   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3620 
3621   if (v) values = (PetscScalar*)v;
3622   else {
3623     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3624     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3625   }
3626 
3627   for (i=0; i<m; i++) {
3628     ii   = i + rstart;
3629     nnz  = Ii[i+1]- Ii[i];
3630     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3631   }
3632   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634 
3635   if (!v) {
3636     ierr = PetscFree(values);CHKERRQ(ierr);
3637   }
3638   PetscFunctionReturn(0);
3639 }
3640 EXTERN_C_END
3641 
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3644 /*@
3645    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3646    (the default parallel PETSc format).
3647 
3648    Collective on MPI_Comm
3649 
3650    Input Parameters:
3651 +  B - the matrix
3652 .  i - the indices into j for the start of each local row (starts with zero)
3653 .  j - the column indices for each local row (starts with zero)
3654 -  v - optional values in the matrix
3655 
3656    Level: developer
3657 
3658    Notes:
3659        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3660      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3661      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3662 
3663        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3664 
3665        The format which is used for the sparse matrix input, is equivalent to a
3666     row-major ordering.. i.e for the following matrix, the input data expected is
3667     as shown:
3668 
3669         1 0 0
3670         2 0 3     P0
3671        -------
3672         4 5 6     P1
3673 
3674      Process0 [P0]: rows_owned=[0,1]
3675         i =  {0,1,3}  [size = nrow+1  = 2+1]
3676         j =  {0,0,2}  [size = nz = 6]
3677         v =  {1,2,3}  [size = nz = 6]
3678 
3679      Process1 [P1]: rows_owned=[2]
3680         i =  {0,3}    [size = nrow+1  = 1+1]
3681         j =  {0,1,2}  [size = nz = 6]
3682         v =  {4,5,6}  [size = nz = 6]
3683 
3684 .keywords: matrix, aij, compressed row, sparse, parallel
3685 
3686 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3687           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3688 @*/
3689 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3690 {
3691   PetscErrorCode ierr;
3692 
3693   PetscFunctionBegin;
3694   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 #undef __FUNCT__
3699 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3700 /*@C
3701    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3702    (the default parallel PETSc format).  For good matrix assembly performance
3703    the user should preallocate the matrix storage by setting the parameters
3704    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3705    performance can be increased by more than a factor of 50.
3706 
3707    Collective on MPI_Comm
3708 
3709    Input Parameters:
3710 +  A - the matrix
3711 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3712            (same value is used for all local rows)
3713 .  d_nnz - array containing the number of nonzeros in the various rows of the
3714            DIAGONAL portion of the local submatrix (possibly different for each row)
3715            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3716            The size of this array is equal to the number of local rows, i.e 'm'.
3717            You must leave room for the diagonal entry even if it is zero.
3718 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3719            submatrix (same value is used for all local rows).
3720 -  o_nnz - array containing the number of nonzeros in the various rows of the
3721            OFF-DIAGONAL portion of the local submatrix (possibly different for
3722            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3723            structure. The size of this array is equal to the number
3724            of local rows, i.e 'm'.
3725 
3726    If the *_nnz parameter is given then the *_nz parameter is ignored
3727 
3728    The AIJ format (also called the Yale sparse matrix format or
3729    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3730    storage.  The stored row and column indices begin with zero.
3731    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3732 
3733    The parallel matrix is partitioned such that the first m0 rows belong to
3734    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3735    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3736 
3737    The DIAGONAL portion of the local submatrix of a processor can be defined
3738    as the submatrix which is obtained by extraction the part corresponding to
3739    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3740    first row that belongs to the processor, r2 is the last row belonging to
3741    the this processor, and c1-c2 is range of indices of the local part of a
3742    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3743    common case of a square matrix, the row and column ranges are the same and
3744    the DIAGONAL part is also square. The remaining portion of the local
3745    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3746 
3747    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3748 
3749    You can call MatGetInfo() to get information on how effective the preallocation was;
3750    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3751    You can also run with the option -info and look for messages with the string
3752    malloc in them to see if additional memory allocation was needed.
3753 
3754    Example usage:
3755 
3756    Consider the following 8x8 matrix with 34 non-zero values, that is
3757    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3758    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3759    as follows:
3760 
3761 .vb
3762             1  2  0  |  0  3  0  |  0  4
3763     Proc0   0  5  6  |  7  0  0  |  8  0
3764             9  0 10  | 11  0  0  | 12  0
3765     -------------------------------------
3766            13  0 14  | 15 16 17  |  0  0
3767     Proc1   0 18  0  | 19 20 21  |  0  0
3768             0  0  0  | 22 23  0  | 24  0
3769     -------------------------------------
3770     Proc2  25 26 27  |  0  0 28  | 29  0
3771            30  0  0  | 31 32 33  |  0 34
3772 .ve
3773 
3774    This can be represented as a collection of submatrices as:
3775 
3776 .vb
3777       A B C
3778       D E F
3779       G H I
3780 .ve
3781 
3782    Where the submatrices A,B,C are owned by proc0, D,E,F are
3783    owned by proc1, G,H,I are owned by proc2.
3784 
3785    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3786    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3787    The 'M','N' parameters are 8,8, and have the same values on all procs.
3788 
3789    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3790    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3791    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3792    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3793    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3794    matrix, ans [DF] as another SeqAIJ matrix.
3795 
3796    When d_nz, o_nz parameters are specified, d_nz storage elements are
3797    allocated for every row of the local diagonal submatrix, and o_nz
3798    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3799    One way to choose d_nz and o_nz is to use the max nonzerors per local
3800    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3801    In this case, the values of d_nz,o_nz are:
3802 .vb
3803      proc0 : dnz = 2, o_nz = 2
3804      proc1 : dnz = 3, o_nz = 2
3805      proc2 : dnz = 1, o_nz = 4
3806 .ve
3807    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3808    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3809    for proc3. i.e we are using 12+15+10=37 storage locations to store
3810    34 values.
3811 
3812    When d_nnz, o_nnz parameters are specified, the storage is specified
3813    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3814    In the above case the values for d_nnz,o_nnz are:
3815 .vb
3816      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3817      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3818      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3819 .ve
3820    Here the space allocated is sum of all the above values i.e 34, and
3821    hence pre-allocation is perfect.
3822 
3823    Level: intermediate
3824 
3825 .keywords: matrix, aij, compressed row, sparse, parallel
3826 
3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3828           MPIAIJ, MatGetInfo()
3829 @*/
3830 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3831 {
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3841 /*@
3842      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3843          CSR format the local rows.
3844 
3845    Collective on MPI_Comm
3846 
3847    Input Parameters:
3848 +  comm - MPI communicator
3849 .  m - number of local rows (Cannot be PETSC_DECIDE)
3850 .  n - This value should be the same as the local size used in creating the
3851        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3852        calculated if N is given) For square matrices n is almost always m.
3853 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3854 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3855 .   i - row indices
3856 .   j - column indices
3857 -   a - matrix values
3858 
3859    Output Parameter:
3860 .   mat - the matrix
3861 
3862    Level: intermediate
3863 
3864    Notes:
3865        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3866      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3867      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3868 
3869        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3870 
3871        The format which is used for the sparse matrix input, is equivalent to a
3872     row-major ordering.. i.e for the following matrix, the input data expected is
3873     as shown:
3874 
3875         1 0 0
3876         2 0 3     P0
3877        -------
3878         4 5 6     P1
3879 
3880      Process0 [P0]: rows_owned=[0,1]
3881         i =  {0,1,3}  [size = nrow+1  = 2+1]
3882         j =  {0,0,2}  [size = nz = 6]
3883         v =  {1,2,3}  [size = nz = 6]
3884 
3885      Process1 [P1]: rows_owned=[2]
3886         i =  {0,3}    [size = nrow+1  = 1+1]
3887         j =  {0,1,2}  [size = nz = 6]
3888         v =  {4,5,6}  [size = nz = 6]
3889 
3890 .keywords: matrix, aij, compressed row, sparse, parallel
3891 
3892 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3893           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3894 @*/
3895 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3896 {
3897   PetscErrorCode ierr;
3898 
3899  PetscFunctionBegin;
3900   if (i[0]) {
3901     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3902   }
3903   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3904   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3905   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3906   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3907   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 #undef __FUNCT__
3912 #define __FUNCT__ "MatCreateMPIAIJ"
3913 /*@C
3914    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3915    (the default parallel PETSc format).  For good matrix assembly performance
3916    the user should preallocate the matrix storage by setting the parameters
3917    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3918    performance can be increased by more than a factor of 50.
3919 
3920    Collective on MPI_Comm
3921 
3922    Input Parameters:
3923 +  comm - MPI communicator
3924 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3925            This value should be the same as the local size used in creating the
3926            y vector for the matrix-vector product y = Ax.
3927 .  n - This value should be the same as the local size used in creating the
3928        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3929        calculated if N is given) For square matrices n is almost always m.
3930 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3931 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3932 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3933            (same value is used for all local rows)
3934 .  d_nnz - array containing the number of nonzeros in the various rows of the
3935            DIAGONAL portion of the local submatrix (possibly different for each row)
3936            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3937            The size of this array is equal to the number of local rows, i.e 'm'.
3938            You must leave room for the diagonal entry even if it is zero.
3939 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3940            submatrix (same value is used for all local rows).
3941 -  o_nnz - array containing the number of nonzeros in the various rows of the
3942            OFF-DIAGONAL portion of the local submatrix (possibly different for
3943            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3944            structure. The size of this array is equal to the number
3945            of local rows, i.e 'm'.
3946 
3947    Output Parameter:
3948 .  A - the matrix
3949 
3950    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3951    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3952    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3953 
3954    Notes:
3955    If the *_nnz parameter is given then the *_nz parameter is ignored
3956 
3957    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3958    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3959    storage requirements for this matrix.
3960 
3961    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3962    processor than it must be used on all processors that share the object for
3963    that argument.
3964 
3965    The user MUST specify either the local or global matrix dimensions
3966    (possibly both).
3967 
3968    The parallel matrix is partitioned across processors such that the
3969    first m0 rows belong to process 0, the next m1 rows belong to
3970    process 1, the next m2 rows belong to process 2 etc.. where
3971    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3972    values corresponding to [m x N] submatrix.
3973 
3974    The columns are logically partitioned with the n0 columns belonging
3975    to 0th partition, the next n1 columns belonging to the next
3976    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3977 
3978    The DIAGONAL portion of the local submatrix on any given processor
3979    is the submatrix corresponding to the rows and columns m,n
3980    corresponding to the given processor. i.e diagonal matrix on
3981    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3982    etc. The remaining portion of the local submatrix [m x (N-n)]
3983    constitute the OFF-DIAGONAL portion. The example below better
3984    illustrates this concept.
3985 
3986    For a square global matrix we define each processor's diagonal portion
3987    to be its local rows and the corresponding columns (a square submatrix);
3988    each processor's off-diagonal portion encompasses the remainder of the
3989    local matrix (a rectangular submatrix).
3990 
3991    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3992 
3993    When calling this routine with a single process communicator, a matrix of
3994    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3995    type of communicator, use the construction mechanism:
3996      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3997 
3998    By default, this format uses inodes (identical nodes) when possible.
3999    We search for consecutive rows with the same nonzero structure, thereby
4000    reusing matrix information to achieve increased efficiency.
4001 
4002    Options Database Keys:
4003 +  -mat_no_inode  - Do not use inodes
4004 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4005 -  -mat_aij_oneindex - Internally use indexing starting at 1
4006         rather than 0.  Note that when calling MatSetValues(),
4007         the user still MUST index entries starting at 0!
4008 
4009 
4010    Example usage:
4011 
4012    Consider the following 8x8 matrix with 34 non-zero values, that is
4013    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4014    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4015    as follows:
4016 
4017 .vb
4018             1  2  0  |  0  3  0  |  0  4
4019     Proc0   0  5  6  |  7  0  0  |  8  0
4020             9  0 10  | 11  0  0  | 12  0
4021     -------------------------------------
4022            13  0 14  | 15 16 17  |  0  0
4023     Proc1   0 18  0  | 19 20 21  |  0  0
4024             0  0  0  | 22 23  0  | 24  0
4025     -------------------------------------
4026     Proc2  25 26 27  |  0  0 28  | 29  0
4027            30  0  0  | 31 32 33  |  0 34
4028 .ve
4029 
4030    This can be represented as a collection of submatrices as:
4031 
4032 .vb
4033       A B C
4034       D E F
4035       G H I
4036 .ve
4037 
4038    Where the submatrices A,B,C are owned by proc0, D,E,F are
4039    owned by proc1, G,H,I are owned by proc2.
4040 
4041    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4042    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4043    The 'M','N' parameters are 8,8, and have the same values on all procs.
4044 
4045    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4046    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4047    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4048    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4049    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4050    matrix, ans [DF] as another SeqAIJ matrix.
4051 
4052    When d_nz, o_nz parameters are specified, d_nz storage elements are
4053    allocated for every row of the local diagonal submatrix, and o_nz
4054    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4055    One way to choose d_nz and o_nz is to use the max nonzerors per local
4056    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4057    In this case, the values of d_nz,o_nz are:
4058 .vb
4059      proc0 : dnz = 2, o_nz = 2
4060      proc1 : dnz = 3, o_nz = 2
4061      proc2 : dnz = 1, o_nz = 4
4062 .ve
4063    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4064    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4065    for proc3. i.e we are using 12+15+10=37 storage locations to store
4066    34 values.
4067 
4068    When d_nnz, o_nnz parameters are specified, the storage is specified
4069    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4070    In the above case the values for d_nnz,o_nnz are:
4071 .vb
4072      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4073      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4074      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4075 .ve
4076    Here the space allocated is sum of all the above values i.e 34, and
4077    hence pre-allocation is perfect.
4078 
4079    Level: intermediate
4080 
4081 .keywords: matrix, aij, compressed row, sparse, parallel
4082 
4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4084           MPIAIJ, MatCreateMPIAIJWithArrays()
4085 @*/
4086 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4087 {
4088   PetscErrorCode ierr;
4089   PetscMPIInt    size;
4090 
4091   PetscFunctionBegin;
4092   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4093   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4095   if (size > 1) {
4096     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4097     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4098   } else {
4099     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4100     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4101   }
4102   PetscFunctionReturn(0);
4103 }
4104 
4105 #undef __FUNCT__
4106 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4107 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4108 {
4109   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4110 
4111   PetscFunctionBegin;
4112   *Ad     = a->A;
4113   *Ao     = a->B;
4114   *colmap = a->garray;
4115   PetscFunctionReturn(0);
4116 }
4117 
4118 #undef __FUNCT__
4119 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4120 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4121 {
4122   PetscErrorCode ierr;
4123   PetscInt       i;
4124   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4125 
4126   PetscFunctionBegin;
4127   if (coloring->ctype == IS_COLORING_GLOBAL) {
4128     ISColoringValue *allcolors,*colors;
4129     ISColoring      ocoloring;
4130 
4131     /* set coloring for diagonal portion */
4132     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4133 
4134     /* set coloring for off-diagonal portion */
4135     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4136     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4137     for (i=0; i<a->B->cmap->n; i++) {
4138       colors[i] = allcolors[a->garray[i]];
4139     }
4140     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4141     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4142     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4143     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4144   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4145     ISColoringValue *colors;
4146     PetscInt        *larray;
4147     ISColoring      ocoloring;
4148 
4149     /* set coloring for diagonal portion */
4150     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4151     for (i=0; i<a->A->cmap->n; i++) {
4152       larray[i] = i + A->cmap->rstart;
4153     }
4154     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4155     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4156     for (i=0; i<a->A->cmap->n; i++) {
4157       colors[i] = coloring->colors[larray[i]];
4158     }
4159     ierr = PetscFree(larray);CHKERRQ(ierr);
4160     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4161     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4162     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4163 
4164     /* set coloring for off-diagonal portion */
4165     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4166     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4167     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4168     for (i=0; i<a->B->cmap->n; i++) {
4169       colors[i] = coloring->colors[larray[i]];
4170     }
4171     ierr = PetscFree(larray);CHKERRQ(ierr);
4172     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4173     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4174     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4175   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4176 
4177   PetscFunctionReturn(0);
4178 }
4179 
4180 #if defined(PETSC_HAVE_ADIC)
4181 #undef __FUNCT__
4182 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4183 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4184 {
4185   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4186   PetscErrorCode ierr;
4187 
4188   PetscFunctionBegin;
4189   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4190   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4191   PetscFunctionReturn(0);
4192 }
4193 #endif
4194 
4195 #undef __FUNCT__
4196 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4197 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4198 {
4199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4200   PetscErrorCode ierr;
4201 
4202   PetscFunctionBegin;
4203   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4204   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4205   PetscFunctionReturn(0);
4206 }
4207 
4208 #undef __FUNCT__
4209 #define __FUNCT__ "MatMerge"
4210 /*@
4211       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4212                  matrices from each processor
4213 
4214     Collective on MPI_Comm
4215 
4216    Input Parameters:
4217 +    comm - the communicators the parallel matrix will live on
4218 .    inmat - the input sequential matrices
4219 .    n - number of local columns (or PETSC_DECIDE)
4220 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4221 
4222    Output Parameter:
4223 .    outmat - the parallel matrix generated
4224 
4225     Level: advanced
4226 
4227    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4228 
4229 @*/
4230 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4231 {
4232   PetscErrorCode ierr;
4233   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4234   PetscInt       *indx;
4235   PetscScalar    *values;
4236 
4237   PetscFunctionBegin;
4238   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4239   if (scall == MAT_INITIAL_MATRIX){
4240     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4241     if (n == PETSC_DECIDE){
4242       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4243     }
4244     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4245     rstart -= m;
4246 
4247     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4248     for (i=0;i<m;i++) {
4249       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4250       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4251       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4252     }
4253     /* This routine will ONLY return MPIAIJ type matrix */
4254     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4255     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4256     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4257     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4258     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4259 
4260   } else if (scall == MAT_REUSE_MATRIX){
4261     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4262   } else {
4263     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4264   }
4265 
4266   for (i=0;i<m;i++) {
4267     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4268     Ii    = i + rstart;
4269     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4270     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4271   }
4272   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4273   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4274   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4275 
4276   PetscFunctionReturn(0);
4277 }
4278 
4279 #undef __FUNCT__
4280 #define __FUNCT__ "MatFileSplit"
4281 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4282 {
4283   PetscErrorCode    ierr;
4284   PetscMPIInt       rank;
4285   PetscInt          m,N,i,rstart,nnz;
4286   size_t            len;
4287   const PetscInt    *indx;
4288   PetscViewer       out;
4289   char              *name;
4290   Mat               B;
4291   const PetscScalar *values;
4292 
4293   PetscFunctionBegin;
4294   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4295   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4296   /* Should this be the type of the diagonal block of A? */
4297   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4298   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4299   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4300   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4301   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4302   for (i=0;i<m;i++) {
4303     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4304     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4305     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4306   }
4307   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4308   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4309 
4310   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4311   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4312   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4313   sprintf(name,"%s.%d",outfile,rank);
4314   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4315   ierr = PetscFree(name);
4316   ierr = MatView(B,out);CHKERRQ(ierr);
4317   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4318   ierr = MatDestroy(&B);CHKERRQ(ierr);
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4323 #undef __FUNCT__
4324 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4325 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4326 {
4327   PetscErrorCode       ierr;
4328   Mat_Merge_SeqsToMPI  *merge;
4329   PetscContainer       container;
4330 
4331   PetscFunctionBegin;
4332   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4333   if (container) {
4334     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4335     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4336     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4337     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4338     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4339     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4340     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4341     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4342     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4343     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4344     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4347     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4348     ierr = PetscFree(merge);CHKERRQ(ierr);
4349     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4350   }
4351   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4352   PetscFunctionReturn(0);
4353 }
4354 
4355 #include <../src/mat/utils/freespace.h>
4356 #include <petscbt.h>
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4360 /*@C
4361       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4362                  matrices from each processor
4363 
4364     Collective on MPI_Comm
4365 
4366    Input Parameters:
4367 +    comm - the communicators the parallel matrix will live on
4368 .    seqmat - the input sequential matrices
4369 .    m - number of local rows (or PETSC_DECIDE)
4370 .    n - number of local columns (or PETSC_DECIDE)
4371 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4372 
4373    Output Parameter:
4374 .    mpimat - the parallel matrix generated
4375 
4376     Level: advanced
4377 
4378    Notes:
4379      The dimensions of the sequential matrix in each processor MUST be the same.
4380      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4381      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4382 @*/
4383 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4384 {
4385   PetscErrorCode       ierr;
4386   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4387   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4388   PetscMPIInt          size,rank,taga,*len_s;
4389   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4390   PetscInt             proc,m;
4391   PetscInt             **buf_ri,**buf_rj;
4392   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4393   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4394   MPI_Request          *s_waits,*r_waits;
4395   MPI_Status           *status;
4396   MatScalar            *aa=a->a;
4397   MatScalar            **abuf_r,*ba_i;
4398   Mat_Merge_SeqsToMPI  *merge;
4399   PetscContainer       container;
4400 
4401   PetscFunctionBegin;
4402   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4403 
4404   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4405   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4406 
4407   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4408   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4409 
4410   bi     = merge->bi;
4411   bj     = merge->bj;
4412   buf_ri = merge->buf_ri;
4413   buf_rj = merge->buf_rj;
4414 
4415   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4416   owners = merge->rowmap->range;
4417   len_s  = merge->len_s;
4418 
4419   /* send and recv matrix values */
4420   /*-----------------------------*/
4421   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4422   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4423 
4424   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4425   for (proc=0,k=0; proc<size; proc++){
4426     if (!len_s[proc]) continue;
4427     i = owners[proc];
4428     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4429     k++;
4430   }
4431 
4432   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4433   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4434   ierr = PetscFree(status);CHKERRQ(ierr);
4435 
4436   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4437   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4438 
4439   /* insert mat values of mpimat */
4440   /*----------------------------*/
4441   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4442   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4443 
4444   for (k=0; k<merge->nrecv; k++){
4445     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4446     nrows = *(buf_ri_k[k]);
4447     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4448     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4449   }
4450 
4451   /* set values of ba */
4452   m = merge->rowmap->n;
4453   for (i=0; i<m; i++) {
4454     arow = owners[rank] + i;
4455     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4456     bnzi = bi[i+1] - bi[i];
4457     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4458 
4459     /* add local non-zero vals of this proc's seqmat into ba */
4460     anzi = ai[arow+1] - ai[arow];
4461     aj   = a->j + ai[arow];
4462     aa   = a->a + ai[arow];
4463     nextaj = 0;
4464     for (j=0; nextaj<anzi; j++){
4465       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4466         ba_i[j] += aa[nextaj++];
4467       }
4468     }
4469 
4470     /* add received vals into ba */
4471     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4472       /* i-th row */
4473       if (i == *nextrow[k]) {
4474         anzi = *(nextai[k]+1) - *nextai[k];
4475         aj   = buf_rj[k] + *(nextai[k]);
4476         aa   = abuf_r[k] + *(nextai[k]);
4477         nextaj = 0;
4478         for (j=0; nextaj<anzi; j++){
4479           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4480             ba_i[j] += aa[nextaj++];
4481           }
4482         }
4483         nextrow[k]++; nextai[k]++;
4484       }
4485     }
4486     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4487   }
4488   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4489   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4490 
4491   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4492   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4493   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4494   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4495   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4496   PetscFunctionReturn(0);
4497 }
4498 
4499 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4500 
4501 #undef __FUNCT__
4502 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4503 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4504 {
4505   PetscErrorCode       ierr;
4506   Mat                  B_mpi;
4507   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4508   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4509   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4510   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4511   PetscInt             len,proc,*dnz,*onz;
4512   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4513   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4514   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4515   MPI_Status           *status;
4516   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4517   PetscBT              lnkbt;
4518   Mat_Merge_SeqsToMPI  *merge;
4519   PetscContainer       container;
4520 
4521   PetscFunctionBegin;
4522   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4523 
4524   /* make sure it is a PETSc comm */
4525   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4528 
4529   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4530   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4531 
4532   /* determine row ownership */
4533   /*---------------------------------------------------------*/
4534   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4535   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4536   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4537   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4538   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4539   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4540   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4541 
4542   m      = merge->rowmap->n;
4543   M      = merge->rowmap->N;
4544   owners = merge->rowmap->range;
4545 
4546   /* determine the number of messages to send, their lengths */
4547   /*---------------------------------------------------------*/
4548   len_s  = merge->len_s;
4549 
4550   len = 0;  /* length of buf_si[] */
4551   merge->nsend = 0;
4552   for (proc=0; proc<size; proc++){
4553     len_si[proc] = 0;
4554     if (proc == rank){
4555       len_s[proc] = 0;
4556     } else {
4557       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4558       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4559     }
4560     if (len_s[proc]) {
4561       merge->nsend++;
4562       nrows = 0;
4563       for (i=owners[proc]; i<owners[proc+1]; i++){
4564         if (ai[i+1] > ai[i]) nrows++;
4565       }
4566       len_si[proc] = 2*(nrows+1);
4567       len += len_si[proc];
4568     }
4569   }
4570 
4571   /* determine the number and length of messages to receive for ij-structure */
4572   /*-------------------------------------------------------------------------*/
4573   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4574   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4575 
4576   /* post the Irecv of j-structure */
4577   /*-------------------------------*/
4578   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4579   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4580 
4581   /* post the Isend of j-structure */
4582   /*--------------------------------*/
4583   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4584 
4585   for (proc=0, k=0; proc<size; proc++){
4586     if (!len_s[proc]) continue;
4587     i = owners[proc];
4588     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4589     k++;
4590   }
4591 
4592   /* receives and sends of j-structure are complete */
4593   /*------------------------------------------------*/
4594   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4595   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4596 
4597   /* send and recv i-structure */
4598   /*---------------------------*/
4599   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4600   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4601 
4602   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4603   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4604   for (proc=0,k=0; proc<size; proc++){
4605     if (!len_s[proc]) continue;
4606     /* form outgoing message for i-structure:
4607          buf_si[0]:                 nrows to be sent
4608                [1:nrows]:           row index (global)
4609                [nrows+1:2*nrows+1]: i-structure index
4610     */
4611     /*-------------------------------------------*/
4612     nrows = len_si[proc]/2 - 1;
4613     buf_si_i    = buf_si + nrows+1;
4614     buf_si[0]   = nrows;
4615     buf_si_i[0] = 0;
4616     nrows = 0;
4617     for (i=owners[proc]; i<owners[proc+1]; i++){
4618       anzi = ai[i+1] - ai[i];
4619       if (anzi) {
4620         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4621         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4622         nrows++;
4623       }
4624     }
4625     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4626     k++;
4627     buf_si += len_si[proc];
4628   }
4629 
4630   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4631   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4632 
4633   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4634   for (i=0; i<merge->nrecv; i++){
4635     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4636   }
4637 
4638   ierr = PetscFree(len_si);CHKERRQ(ierr);
4639   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4640   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4641   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4642   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4643   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4644   ierr = PetscFree(status);CHKERRQ(ierr);
4645 
4646   /* compute a local seq matrix in each processor */
4647   /*----------------------------------------------*/
4648   /* allocate bi array and free space for accumulating nonzero column info */
4649   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4650   bi[0] = 0;
4651 
4652   /* create and initialize a linked list */
4653   nlnk = N+1;
4654   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4655 
4656   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4657   len = 0;
4658   len  = ai[owners[rank+1]] - ai[owners[rank]];
4659   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4660   current_space = free_space;
4661 
4662   /* determine symbolic info for each local row */
4663   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4664 
4665   for (k=0; k<merge->nrecv; k++){
4666     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4667     nrows = *buf_ri_k[k];
4668     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4669     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4670   }
4671 
4672   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4673   len = 0;
4674   for (i=0;i<m;i++) {
4675     bnzi   = 0;
4676     /* add local non-zero cols of this proc's seqmat into lnk */
4677     arow   = owners[rank] + i;
4678     anzi   = ai[arow+1] - ai[arow];
4679     aj     = a->j + ai[arow];
4680     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4681     bnzi += nlnk;
4682     /* add received col data into lnk */
4683     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4684       if (i == *nextrow[k]) { /* i-th row */
4685         anzi = *(nextai[k]+1) - *nextai[k];
4686         aj   = buf_rj[k] + *nextai[k];
4687         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4688         bnzi += nlnk;
4689         nextrow[k]++; nextai[k]++;
4690       }
4691     }
4692     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4693 
4694     /* if free space is not available, make more free space */
4695     if (current_space->local_remaining<bnzi) {
4696       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4697       nspacedouble++;
4698     }
4699     /* copy data into free space, then initialize lnk */
4700     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4701     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4702 
4703     current_space->array           += bnzi;
4704     current_space->local_used      += bnzi;
4705     current_space->local_remaining -= bnzi;
4706 
4707     bi[i+1] = bi[i] + bnzi;
4708   }
4709 
4710   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4711 
4712   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4713   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4714   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4715 
4716   /* create symbolic parallel matrix B_mpi */
4717   /*---------------------------------------*/
4718   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4719   if (n==PETSC_DECIDE) {
4720     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4721   } else {
4722     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4723   }
4724   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4725   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4726   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4727 
4728   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4729   B_mpi->assembled     = PETSC_FALSE;
4730   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4731   merge->bi            = bi;
4732   merge->bj            = bj;
4733   merge->buf_ri        = buf_ri;
4734   merge->buf_rj        = buf_rj;
4735   merge->coi           = PETSC_NULL;
4736   merge->coj           = PETSC_NULL;
4737   merge->owners_co     = PETSC_NULL;
4738 
4739   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4740 
4741   /* attach the supporting struct to B_mpi for reuse */
4742   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4743   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4744   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4745   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4746   *mpimat = B_mpi;
4747 
4748   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4749   PetscFunctionReturn(0);
4750 }
4751 
4752 #undef __FUNCT__
4753 #define __FUNCT__ "MatMerge_SeqsToMPI"
4754 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4755 {
4756   PetscErrorCode   ierr;
4757 
4758   PetscFunctionBegin;
4759   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4760   if (scall == MAT_INITIAL_MATRIX){
4761     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4762   }
4763   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4764   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4765   PetscFunctionReturn(0);
4766 }
4767 
4768 #undef __FUNCT__
4769 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4770 /*@
4771      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4772           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4773           with MatGetSize()
4774 
4775     Not Collective
4776 
4777    Input Parameters:
4778 +    A - the matrix
4779 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4780 
4781    Output Parameter:
4782 .    A_loc - the local sequential matrix generated
4783 
4784     Level: developer
4785 
4786 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4787 
4788 @*/
4789 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4790 {
4791   PetscErrorCode  ierr;
4792   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4793   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4794   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4795   MatScalar       *aa=a->a,*ba=b->a,*cam;
4796   PetscScalar     *ca;
4797   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4798   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4799   PetscBool       match;
4800 
4801   PetscFunctionBegin;
4802   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4803   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4804   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4805   if (scall == MAT_INITIAL_MATRIX){
4806     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4807     ci[0] = 0;
4808     for (i=0; i<am; i++){
4809       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4810     }
4811     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4812     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4813     k = 0;
4814     for (i=0; i<am; i++) {
4815       ncols_o = bi[i+1] - bi[i];
4816       ncols_d = ai[i+1] - ai[i];
4817       /* off-diagonal portion of A */
4818       for (jo=0; jo<ncols_o; jo++) {
4819         col = cmap[*bj];
4820         if (col >= cstart) break;
4821         cj[k]   = col; bj++;
4822         ca[k++] = *ba++;
4823       }
4824       /* diagonal portion of A */
4825       for (j=0; j<ncols_d; j++) {
4826         cj[k]   = cstart + *aj++;
4827         ca[k++] = *aa++;
4828       }
4829       /* off-diagonal portion of A */
4830       for (j=jo; j<ncols_o; j++) {
4831         cj[k]   = cmap[*bj++];
4832         ca[k++] = *ba++;
4833       }
4834     }
4835     /* put together the new matrix */
4836     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4837     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4838     /* Since these are PETSc arrays, change flags to free them as necessary. */
4839     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4840     mat->free_a  = PETSC_TRUE;
4841     mat->free_ij = PETSC_TRUE;
4842     mat->nonew   = 0;
4843   } else if (scall == MAT_REUSE_MATRIX){
4844     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4845     ci = mat->i; cj = mat->j; cam = mat->a;
4846     for (i=0; i<am; i++) {
4847       /* off-diagonal portion of A */
4848       ncols_o = bi[i+1] - bi[i];
4849       for (jo=0; jo<ncols_o; jo++) {
4850         col = cmap[*bj];
4851         if (col >= cstart) break;
4852         *cam++ = *ba++; bj++;
4853       }
4854       /* diagonal portion of A */
4855       ncols_d = ai[i+1] - ai[i];
4856       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4857       /* off-diagonal portion of A */
4858       for (j=jo; j<ncols_o; j++) {
4859         *cam++ = *ba++; bj++;
4860       }
4861     }
4862   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4863   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4864   PetscFunctionReturn(0);
4865 }
4866 
4867 #undef __FUNCT__
4868 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4869 /*@C
4870      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4871 
4872     Not Collective
4873 
4874    Input Parameters:
4875 +    A - the matrix
4876 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4877 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4878 
4879    Output Parameter:
4880 .    A_loc - the local sequential matrix generated
4881 
4882     Level: developer
4883 
4884 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4885 
4886 @*/
4887 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4888 {
4889   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4890   PetscErrorCode    ierr;
4891   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4892   IS                isrowa,iscola;
4893   Mat               *aloc;
4894   PetscBool       match;
4895 
4896   PetscFunctionBegin;
4897   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4898   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4899   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4900   if (!row){
4901     start = A->rmap->rstart; end = A->rmap->rend;
4902     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4903   } else {
4904     isrowa = *row;
4905   }
4906   if (!col){
4907     start = A->cmap->rstart;
4908     cmap  = a->garray;
4909     nzA   = a->A->cmap->n;
4910     nzB   = a->B->cmap->n;
4911     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4912     ncols = 0;
4913     for (i=0; i<nzB; i++) {
4914       if (cmap[i] < start) idx[ncols++] = cmap[i];
4915       else break;
4916     }
4917     imark = i;
4918     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4919     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4920     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4921   } else {
4922     iscola = *col;
4923   }
4924   if (scall != MAT_INITIAL_MATRIX){
4925     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4926     aloc[0] = *A_loc;
4927   }
4928   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4929   *A_loc = aloc[0];
4930   ierr = PetscFree(aloc);CHKERRQ(ierr);
4931   if (!row){
4932     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4933   }
4934   if (!col){
4935     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4936   }
4937   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatGetBrowsOfAcols"
4943 /*@C
4944     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4945 
4946     Collective on Mat
4947 
4948    Input Parameters:
4949 +    A,B - the matrices in mpiaij format
4950 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4951 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4952 
4953    Output Parameter:
4954 +    rowb, colb - index sets of rows and columns of B to extract
4955 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4956 -    B_seq - the sequential matrix generated
4957 
4958     Level: developer
4959 
4960 @*/
4961 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4962 {
4963   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4964   PetscErrorCode    ierr;
4965   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4966   IS                isrowb,iscolb;
4967   Mat               *bseq;
4968 
4969   PetscFunctionBegin;
4970   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4971     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4972   }
4973   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4974 
4975   if (scall == MAT_INITIAL_MATRIX){
4976     start = A->cmap->rstart;
4977     cmap  = a->garray;
4978     nzA   = a->A->cmap->n;
4979     nzB   = a->B->cmap->n;
4980     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4981     ncols = 0;
4982     for (i=0; i<nzB; i++) {  /* row < local row index */
4983       if (cmap[i] < start) idx[ncols++] = cmap[i];
4984       else break;
4985     }
4986     imark = i;
4987     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4988     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4989     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4990     *brstart = imark;
4991     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4992   } else {
4993     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4994     isrowb = *rowb; iscolb = *colb;
4995     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4996     bseq[0] = *B_seq;
4997   }
4998   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4999   *B_seq = bseq[0];
5000   ierr = PetscFree(bseq);CHKERRQ(ierr);
5001   if (!rowb){
5002     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5003   } else {
5004     *rowb = isrowb;
5005   }
5006   if (!colb){
5007     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5008   } else {
5009     *colb = iscolb;
5010   }
5011   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5012   PetscFunctionReturn(0);
5013 }
5014 
5015 #undef __FUNCT__
5016 #define __FUNCT__ "MatGetBrowsOfAoCols"
5017 /*@C
5018     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5019     of the OFF-DIAGONAL portion of local A
5020 
5021     Collective on Mat
5022 
5023    Input Parameters:
5024 +    A,B - the matrices in mpiaij format
5025 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5026 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5027 .    startsj_r - similar to startsj for receives
5028 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5029 
5030    Output Parameter:
5031 +    B_oth - the sequential matrix generated
5032 
5033     Level: developer
5034 
5035 @*/
5036 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5037 {
5038   VecScatter_MPI_General *gen_to,*gen_from;
5039   PetscErrorCode         ierr;
5040   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5041   Mat_SeqAIJ             *b_oth;
5042   VecScatter             ctx=a->Mvctx;
5043   MPI_Comm               comm=((PetscObject)ctx)->comm;
5044   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5045   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5046   PetscScalar            *rvalues,*svalues;
5047   MatScalar              *b_otha,*bufa,*bufA;
5048   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5049   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5050   MPI_Status             *sstatus,rstatus;
5051   PetscMPIInt            jj;
5052   PetscInt               *cols,sbs,rbs;
5053   PetscScalar            *vals;
5054 
5055   PetscFunctionBegin;
5056   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5057     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5058   }
5059   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5060   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5061 
5062   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5063   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5064   rvalues  = gen_from->values; /* holds the length of receiving row */
5065   svalues  = gen_to->values;   /* holds the length of sending row */
5066   nrecvs   = gen_from->n;
5067   nsends   = gen_to->n;
5068 
5069   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5070   srow     = gen_to->indices;   /* local row index to be sent */
5071   sstarts  = gen_to->starts;
5072   sprocs   = gen_to->procs;
5073   sstatus  = gen_to->sstatus;
5074   sbs      = gen_to->bs;
5075   rstarts  = gen_from->starts;
5076   rprocs   = gen_from->procs;
5077   rbs      = gen_from->bs;
5078 
5079   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5080   if (scall == MAT_INITIAL_MATRIX){
5081     /* i-array */
5082     /*---------*/
5083     /*  post receives */
5084     for (i=0; i<nrecvs; i++){
5085       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5086       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5087       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5088     }
5089 
5090     /* pack the outgoing message */
5091     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5092     sstartsj[0] = 0;  rstartsj[0] = 0;
5093     len = 0; /* total length of j or a array to be sent */
5094     k = 0;
5095     for (i=0; i<nsends; i++){
5096       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5097       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5098       for (j=0; j<nrows; j++) {
5099         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5100         for (l=0; l<sbs; l++){
5101           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5102           rowlen[j*sbs+l] = ncols;
5103           len += ncols;
5104           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5105         }
5106         k++;
5107       }
5108       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5109       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5110     }
5111     /* recvs and sends of i-array are completed */
5112     i = nrecvs;
5113     while (i--) {
5114       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5115     }
5116     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5117 
5118     /* allocate buffers for sending j and a arrays */
5119     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5120     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5121 
5122     /* create i-array of B_oth */
5123     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5124     b_othi[0] = 0;
5125     len = 0; /* total length of j or a array to be received */
5126     k = 0;
5127     for (i=0; i<nrecvs; i++){
5128       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5129       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5130       for (j=0; j<nrows; j++) {
5131         b_othi[k+1] = b_othi[k] + rowlen[j];
5132         len += rowlen[j]; k++;
5133       }
5134       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5135     }
5136 
5137     /* allocate space for j and a arrrays of B_oth */
5138     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5139     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5140 
5141     /* j-array */
5142     /*---------*/
5143     /*  post receives of j-array */
5144     for (i=0; i<nrecvs; i++){
5145       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5146       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5147     }
5148 
5149     /* pack the outgoing message j-array */
5150     k = 0;
5151     for (i=0; i<nsends; i++){
5152       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5153       bufJ = bufj+sstartsj[i];
5154       for (j=0; j<nrows; j++) {
5155         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5156         for (ll=0; ll<sbs; ll++){
5157           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5158           for (l=0; l<ncols; l++){
5159             *bufJ++ = cols[l];
5160           }
5161           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5162         }
5163       }
5164       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5165     }
5166 
5167     /* recvs and sends of j-array are completed */
5168     i = nrecvs;
5169     while (i--) {
5170       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5171     }
5172     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5173   } else if (scall == MAT_REUSE_MATRIX){
5174     sstartsj = *startsj;
5175     rstartsj = *startsj_r;
5176     bufa     = *bufa_ptr;
5177     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5178     b_otha   = b_oth->a;
5179   } else {
5180     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5181   }
5182 
5183   /* a-array */
5184   /*---------*/
5185   /*  post receives of a-array */
5186   for (i=0; i<nrecvs; i++){
5187     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5188     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5189   }
5190 
5191   /* pack the outgoing message a-array */
5192   k = 0;
5193   for (i=0; i<nsends; i++){
5194     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5195     bufA = bufa+sstartsj[i];
5196     for (j=0; j<nrows; j++) {
5197       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5198       for (ll=0; ll<sbs; ll++){
5199         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5200         for (l=0; l<ncols; l++){
5201           *bufA++ = vals[l];
5202         }
5203         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5204       }
5205     }
5206     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5207   }
5208   /* recvs and sends of a-array are completed */
5209   i = nrecvs;
5210   while (i--) {
5211     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5212   }
5213   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5214   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5215 
5216   if (scall == MAT_INITIAL_MATRIX){
5217     /* put together the new matrix */
5218     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5219 
5220     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5221     /* Since these are PETSc arrays, change flags to free them as necessary. */
5222     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5223     b_oth->free_a  = PETSC_TRUE;
5224     b_oth->free_ij = PETSC_TRUE;
5225     b_oth->nonew   = 0;
5226 
5227     ierr = PetscFree(bufj);CHKERRQ(ierr);
5228     if (!startsj || !bufa_ptr){
5229       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5230       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5231     } else {
5232       *startsj   = sstartsj;
5233       *startsj_r = rstartsj;
5234       *bufa_ptr  = bufa;
5235     }
5236   }
5237   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5238   PetscFunctionReturn(0);
5239 }
5240 
5241 #undef __FUNCT__
5242 #define __FUNCT__ "MatGetCommunicationStructs"
5243 /*@C
5244   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5245 
5246   Not Collective
5247 
5248   Input Parameters:
5249 . A - The matrix in mpiaij format
5250 
5251   Output Parameter:
5252 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5253 . colmap - A map from global column index to local index into lvec
5254 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5255 
5256   Level: developer
5257 
5258 @*/
5259 #if defined (PETSC_USE_CTABLE)
5260 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5261 #else
5262 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5263 #endif
5264 {
5265   Mat_MPIAIJ *a;
5266 
5267   PetscFunctionBegin;
5268   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5269   PetscValidPointer(lvec, 2);
5270   PetscValidPointer(colmap, 3);
5271   PetscValidPointer(multScatter, 4);
5272   a = (Mat_MPIAIJ *) A->data;
5273   if (lvec) *lvec = a->lvec;
5274   if (colmap) *colmap = a->colmap;
5275   if (multScatter) *multScatter = a->Mvctx;
5276   PetscFunctionReturn(0);
5277 }
5278 
5279 EXTERN_C_BEGIN
5280 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5281 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5282 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5283 EXTERN_C_END
5284 
5285 #undef __FUNCT__
5286 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5287 /*
5288     Computes (B'*A')' since computing B*A directly is untenable
5289 
5290                n                       p                          p
5291         (              )       (              )         (                  )
5292       m (      A       )  *  n (       B      )   =   m (         C        )
5293         (              )       (              )         (                  )
5294 
5295 */
5296 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5297 {
5298   PetscErrorCode     ierr;
5299   Mat                At,Bt,Ct;
5300 
5301   PetscFunctionBegin;
5302   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5303   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5304   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5305   ierr = MatDestroy(&At);CHKERRQ(ierr);
5306   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5307   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5308   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5309   PetscFunctionReturn(0);
5310 }
5311 
5312 #undef __FUNCT__
5313 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5314 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5315 {
5316   PetscErrorCode ierr;
5317   PetscInt       m=A->rmap->n,n=B->cmap->n;
5318   Mat            Cmat;
5319 
5320   PetscFunctionBegin;
5321   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5322   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5323   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5324   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5325   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5326   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5327   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5328   *C   = Cmat;
5329   PetscFunctionReturn(0);
5330 }
5331 
5332 /* ----------------------------------------------------------------*/
5333 #undef __FUNCT__
5334 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5335 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5336 {
5337   PetscErrorCode ierr;
5338 
5339   PetscFunctionBegin;
5340   if (scall == MAT_INITIAL_MATRIX){
5341     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5342   }
5343   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5344   PetscFunctionReturn(0);
5345 }
5346 
5347 EXTERN_C_BEGIN
5348 #if defined(PETSC_HAVE_MUMPS)
5349 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5350 #endif
5351 #if defined(PETSC_HAVE_PASTIX)
5352 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5353 #endif
5354 #if defined(PETSC_HAVE_SUPERLU_DIST)
5355 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5356 #endif
5357 #if defined(PETSC_HAVE_SPOOLES)
5358 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5359 #endif
5360 EXTERN_C_END
5361 
5362 /*MC
5363    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5364 
5365    Options Database Keys:
5366 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5367 
5368   Level: beginner
5369 
5370 .seealso: MatCreateMPIAIJ()
5371 M*/
5372 
5373 EXTERN_C_BEGIN
5374 #undef __FUNCT__
5375 #define __FUNCT__ "MatCreate_MPIAIJ"
5376 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5377 {
5378   Mat_MPIAIJ     *b;
5379   PetscErrorCode ierr;
5380   PetscMPIInt    size;
5381 
5382   PetscFunctionBegin;
5383   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5384 
5385   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5386   B->data         = (void*)b;
5387   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5388   B->rmap->bs     = 1;
5389   B->assembled    = PETSC_FALSE;
5390 
5391   B->insertmode   = NOT_SET_VALUES;
5392   b->size         = size;
5393   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5394 
5395   /* build cache for off array entries formed */
5396   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5397   b->donotstash  = PETSC_FALSE;
5398   b->colmap      = 0;
5399   b->garray      = 0;
5400   b->roworiented = PETSC_TRUE;
5401 
5402   /* stuff used for matrix vector multiply */
5403   b->lvec      = PETSC_NULL;
5404   b->Mvctx     = PETSC_NULL;
5405 
5406   /* stuff for MatGetRow() */
5407   b->rowindices   = 0;
5408   b->rowvalues    = 0;
5409   b->getrowactive = PETSC_FALSE;
5410 
5411 #if defined(PETSC_HAVE_SPOOLES)
5412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5413                                      "MatGetFactor_mpiaij_spooles",
5414                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5415 #endif
5416 #if defined(PETSC_HAVE_MUMPS)
5417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5418                                      "MatGetFactor_aij_mumps",
5419                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5420 #endif
5421 #if defined(PETSC_HAVE_PASTIX)
5422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5423 					   "MatGetFactor_mpiaij_pastix",
5424 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5425 #endif
5426 #if defined(PETSC_HAVE_SUPERLU_DIST)
5427   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5428                                      "MatGetFactor_mpiaij_superlu_dist",
5429                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5430 #endif
5431   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5432                                      "MatStoreValues_MPIAIJ",
5433                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5435                                      "MatRetrieveValues_MPIAIJ",
5436                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5437   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5438 				     "MatGetDiagonalBlock_MPIAIJ",
5439                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5440   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5441 				     "MatIsTranspose_MPIAIJ",
5442 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5443   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5444 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5445 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5446   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5447 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5448 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5449   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5450 				     "MatDiagonalScaleLocal_MPIAIJ",
5451 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5452   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5453                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5454                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5455   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5456                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5457                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5458   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5459                                      "MatConvert_MPIAIJ_MPISBAIJ",
5460                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5461   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5462                                      "MatMatMult_MPIDense_MPIAIJ",
5463                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5464   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5465                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5466                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5467   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5468                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5469                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5470   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5471   PetscFunctionReturn(0);
5472 }
5473 EXTERN_C_END
5474 
5475 #undef __FUNCT__
5476 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5477 /*@
5478      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5479          and "off-diagonal" part of the matrix in CSR format.
5480 
5481    Collective on MPI_Comm
5482 
5483    Input Parameters:
5484 +  comm - MPI communicator
5485 .  m - number of local rows (Cannot be PETSC_DECIDE)
5486 .  n - This value should be the same as the local size used in creating the
5487        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5488        calculated if N is given) For square matrices n is almost always m.
5489 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5490 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5491 .   i - row indices for "diagonal" portion of matrix
5492 .   j - column indices
5493 .   a - matrix values
5494 .   oi - row indices for "off-diagonal" portion of matrix
5495 .   oj - column indices
5496 -   oa - matrix values
5497 
5498    Output Parameter:
5499 .   mat - the matrix
5500 
5501    Level: advanced
5502 
5503    Notes:
5504        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5505        must free the arrays once the matrix has been destroyed and not before.
5506 
5507        The i and j indices are 0 based
5508 
5509        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5510 
5511        This sets local rows and cannot be used to set off-processor values.
5512 
5513        You cannot later use MatSetValues() to change values in this matrix.
5514 
5515 .keywords: matrix, aij, compressed row, sparse, parallel
5516 
5517 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5518           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5519 @*/
5520 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5521 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5522 {
5523   PetscErrorCode ierr;
5524   Mat_MPIAIJ     *maij;
5525 
5526  PetscFunctionBegin;
5527   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5528   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5529   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5530   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5531   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5532   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5533   maij = (Mat_MPIAIJ*) (*mat)->data;
5534   maij->donotstash     = PETSC_TRUE;
5535   (*mat)->preallocated = PETSC_TRUE;
5536 
5537   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5538   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5539   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5540   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5541 
5542   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5543   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5544 
5545   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5546   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5547   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5548   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5549 
5550   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5551   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 
5555 /*
5556     Special version for direct calls from Fortran
5557 */
5558 #include <private/fortranimpl.h>
5559 
5560 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5561 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5562 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5563 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5564 #endif
5565 
5566 /* Change these macros so can be used in void function */
5567 #undef CHKERRQ
5568 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5569 #undef SETERRQ2
5570 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5571 #undef SETERRQ
5572 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5573 
5574 EXTERN_C_BEGIN
5575 #undef __FUNCT__
5576 #define __FUNCT__ "matsetvaluesmpiaij_"
5577 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5578 {
5579   Mat             mat = *mmat;
5580   PetscInt        m = *mm, n = *mn;
5581   InsertMode      addv = *maddv;
5582   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5583   PetscScalar     value;
5584   PetscErrorCode  ierr;
5585 
5586   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5587   if (mat->insertmode == NOT_SET_VALUES) {
5588     mat->insertmode = addv;
5589   }
5590 #if defined(PETSC_USE_DEBUG)
5591   else if (mat->insertmode != addv) {
5592     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5593   }
5594 #endif
5595   {
5596   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5597   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5598   PetscBool       roworiented = aij->roworiented;
5599 
5600   /* Some Variables required in the macro */
5601   Mat             A = aij->A;
5602   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5603   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5604   MatScalar       *aa = a->a;
5605   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5606   Mat             B = aij->B;
5607   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5608   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5609   MatScalar       *ba = b->a;
5610 
5611   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5612   PetscInt        nonew = a->nonew;
5613   MatScalar       *ap1,*ap2;
5614 
5615   PetscFunctionBegin;
5616   for (i=0; i<m; i++) {
5617     if (im[i] < 0) continue;
5618 #if defined(PETSC_USE_DEBUG)
5619     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5620 #endif
5621     if (im[i] >= rstart && im[i] < rend) {
5622       row      = im[i] - rstart;
5623       lastcol1 = -1;
5624       rp1      = aj + ai[row];
5625       ap1      = aa + ai[row];
5626       rmax1    = aimax[row];
5627       nrow1    = ailen[row];
5628       low1     = 0;
5629       high1    = nrow1;
5630       lastcol2 = -1;
5631       rp2      = bj + bi[row];
5632       ap2      = ba + bi[row];
5633       rmax2    = bimax[row];
5634       nrow2    = bilen[row];
5635       low2     = 0;
5636       high2    = nrow2;
5637 
5638       for (j=0; j<n; j++) {
5639         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5640         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5641         if (in[j] >= cstart && in[j] < cend){
5642           col = in[j] - cstart;
5643           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5644         } else if (in[j] < 0) continue;
5645 #if defined(PETSC_USE_DEBUG)
5646         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5647 #endif
5648         else {
5649           if (mat->was_assembled) {
5650             if (!aij->colmap) {
5651               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5652             }
5653 #if defined (PETSC_USE_CTABLE)
5654             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5655 	    col--;
5656 #else
5657             col = aij->colmap[in[j]] - 1;
5658 #endif
5659             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5660               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5661               col =  in[j];
5662               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5663               B = aij->B;
5664               b = (Mat_SeqAIJ*)B->data;
5665               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5666               rp2      = bj + bi[row];
5667               ap2      = ba + bi[row];
5668               rmax2    = bimax[row];
5669               nrow2    = bilen[row];
5670               low2     = 0;
5671               high2    = nrow2;
5672               bm       = aij->B->rmap->n;
5673               ba = b->a;
5674             }
5675           } else col = in[j];
5676           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5677         }
5678       }
5679     } else {
5680       if (!aij->donotstash) {
5681         if (roworiented) {
5682           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5683         } else {
5684           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5685         }
5686       }
5687     }
5688   }}
5689   PetscFunctionReturnVoid();
5690 }
5691 EXTERN_C_END
5692 
5693