xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c5b2ea42cd4070151795b0df3c8c47a133418b2b)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
8 {
9   PetscErrorCode  ierr;
10   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
11   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
12   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
13   const PetscInt  *ia,*ib;
14   const MatScalar *aa,*bb;
15   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
16   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
17 
18   PetscFunctionBegin;
19   *keptrows = 0;
20   ia = a->i;
21   ib = b->i;
22   for (i=0; i<m; i++) {
23     na = ia[i+1] - ia[i];
24     nb = ib[i+1] - ib[i];
25     if (!na && !nb) {
26       cnt++;
27       goto ok1;
28     }
29     aa = a->a + ia[i];
30     for (j=0; j<na; j++) {
31       if (aa[j] != 0.0) goto ok1;
32     }
33     bb = b->a + ib[i];
34     for (j=0; j <nb; j++) {
35       if (bb[j] != 0.0) goto ok1;
36     }
37     cnt++;
38     ok1:;
39   }
40   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
41   if (!n0rows) PetscFunctionReturn(0);
42   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
43   cnt = 0;
44   for (i=0; i<m; i++) {
45     na = ia[i+1] - ia[i];
46     nb = ib[i+1] - ib[i];
47     if (!na && !nb) continue;
48     aa = a->a + ia[i];
49     for(j=0; j<na;j++) {
50       if (aa[j] != 0.0) {
51         rows[cnt++] = rstart + i;
52         goto ok2;
53       }
54     }
55     bb = b->a + ib[i];
56     for (j=0; j<nb; j++) {
57       if (bb[j] != 0.0) {
58         rows[cnt++] = rstart + i;
59         goto ok2;
60       }
61     }
62     ok2:;
63   }
64   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatDistribute_MPIAIJ"
70 /*
71     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
72     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
73 
74     Only for square matrices
75 */
76 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
77 {
78   PetscMPIInt    rank,size;
79   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
80   PetscErrorCode ierr;
81   Mat            mat;
82   Mat_SeqAIJ     *gmata;
83   PetscMPIInt    tag;
84   MPI_Status     status;
85   PetscBool      aij;
86   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
87 
88   PetscFunctionBegin;
89   CHKMEMQ;
90   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
91   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
92   if (!rank) {
93     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
94     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
95   }
96   if (reuse == MAT_INITIAL_MATRIX) {
97     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
98     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
99     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
100     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
101     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
102     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
103     rowners[0] = 0;
104     for (i=2; i<=size; i++) {
105       rowners[i] += rowners[i-1];
106     }
107     rstart = rowners[rank];
108     rend   = rowners[rank+1];
109     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
110     if (!rank) {
111       gmata = (Mat_SeqAIJ*) gmat->data;
112       /* send row lengths to all processors */
113       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
114       for (i=1; i<size; i++) {
115 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
116       }
117       /* determine number diagonal and off-diagonal counts */
118       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
119       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
120       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
121       jj = 0;
122       for (i=0; i<m; i++) {
123 	for (j=0; j<dlens[i]; j++) {
124           if (gmata->j[jj] < rstart) ld[i]++;
125 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
126 	  jj++;
127 	}
128       }
129       /* send column indices to other processes */
130       for (i=1; i<size; i++) {
131 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
132 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
133 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134       }
135 
136       /* send numerical values to other processes */
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       gmataa = gmata->a;
142       gmataj = gmata->j;
143 
144     } else {
145       /* receive row lengths */
146       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
147       /* receive column indices */
148       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
149       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
150       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
151       /* determine number diagonal and off-diagonal counts */
152       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
153       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
154       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
155       jj = 0;
156       for (i=0; i<m; i++) {
157 	for (j=0; j<dlens[i]; j++) {
158           if (gmataj[jj] < rstart) ld[i]++;
159 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
160 	  jj++;
161 	}
162       }
163       /* receive numerical values */
164       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
165       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
166     }
167     /* set preallocation */
168     for (i=0; i<m; i++) {
169       dlens[i] -= olens[i];
170     }
171     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
172     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
173 
174     for (i=0; i<m; i++) {
175       dlens[i] += olens[i];
176     }
177     cnt  = 0;
178     for (i=0; i<m; i++) {
179       row  = rstart + i;
180       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
181       cnt += dlens[i];
182     }
183     if (rank) {
184       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
185     }
186     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
187     ierr = PetscFree(rowners);CHKERRQ(ierr);
188     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
189     *inmat = mat;
190   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
191     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
192     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
193     mat   = *inmat;
194     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
195     if (!rank) {
196       /* send numerical values to other processes */
197       gmata = (Mat_SeqAIJ*) gmat->data;
198       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
199       gmataa = gmata->a;
200       for (i=1; i<size; i++) {
201         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
202         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
203       }
204       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
205     } else {
206       /* receive numerical values from process 0*/
207       nz   = Ad->nz + Ao->nz;
208       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
209       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
210     }
211     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
212     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
213     ad = Ad->a;
214     ao = Ao->a;
215     if (mat->rmap->n) {
216       i  = 0;
217       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
218       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
219     }
220     for (i=1; i<mat->rmap->n; i++) {
221       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
222       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
223     }
224     i--;
225     if (mat->rmap->n) {
226       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
227     }
228     if (rank) {
229       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
230     }
231   }
232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   CHKMEMQ;
235   PetscFunctionReturn(0);
236 }
237 
238 /*
239   Local utility routine that creates a mapping from the global column
240 number to the local number in the off-diagonal part of the local
241 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
242 a slightly higher hash table cost; without it it is not scalable (each processor
243 has an order N integer array but is fast to acess.
244 */
245 #undef __FUNCT__
246 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
247 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
248 {
249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
250   PetscErrorCode ierr;
251   PetscInt       n = aij->B->cmap->n,i;
252 
253   PetscFunctionBegin;
254 #if defined (PETSC_USE_CTABLE)
255   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
256   for (i=0; i<n; i++){
257     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
258   }
259 #else
260   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
261   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
262   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
264 #endif
265   PetscFunctionReturn(0);
266 }
267 
268 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
269 { \
270     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
271     lastcol1 = col;\
272     while (high1-low1 > 5) { \
273       t = (low1+high1)/2; \
274       if (rp1[t] > col) high1 = t; \
275       else             low1  = t; \
276     } \
277       for (_i=low1; _i<high1; _i++) { \
278         if (rp1[_i] > col) break; \
279         if (rp1[_i] == col) { \
280           if (addv == ADD_VALUES) ap1[_i] += value;   \
281           else                    ap1[_i] = value; \
282           goto a_noinsert; \
283         } \
284       }  \
285       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
286       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
287       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
288       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
289       N = nrow1++ - 1; a->nz++; high1++; \
290       /* shift up all the later entries in this row */ \
291       for (ii=N; ii>=_i; ii--) { \
292         rp1[ii+1] = rp1[ii]; \
293         ap1[ii+1] = ap1[ii]; \
294       } \
295       rp1[_i] = col;  \
296       ap1[_i] = value;  \
297       a_noinsert: ; \
298       ailen[row] = nrow1; \
299 }
300 
301 
302 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
303 { \
304     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
305     lastcol2 = col;\
306     while (high2-low2 > 5) { \
307       t = (low2+high2)/2; \
308       if (rp2[t] > col) high2 = t; \
309       else             low2  = t; \
310     } \
311     for (_i=low2; _i<high2; _i++) {		\
312       if (rp2[_i] > col) break;			\
313       if (rp2[_i] == col) {			      \
314 	if (addv == ADD_VALUES) ap2[_i] += value;     \
315 	else                    ap2[_i] = value;      \
316 	goto b_noinsert;			      \
317       }						      \
318     }							      \
319     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
320     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
321     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
322     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
323     N = nrow2++ - 1; b->nz++; high2++;					\
324     /* shift up all the later entries in this row */			\
325     for (ii=N; ii>=_i; ii--) {						\
326       rp2[ii+1] = rp2[ii];						\
327       ap2[ii+1] = ap2[ii];						\
328     }									\
329     rp2[_i] = col;							\
330     ap2[_i] = value;							\
331     b_noinsert: ;								\
332     bilen[row] = nrow2;							\
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
337 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
338 {
339   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
340   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
341   PetscErrorCode ierr;
342   PetscInt       l,*garray = mat->garray,diag;
343 
344   PetscFunctionBegin;
345   /* code only works for square matrices A */
346 
347   /* find size of row to the left of the diagonal part */
348   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
349   row  = row - diag;
350   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
351     if (garray[b->j[b->i[row]+l]] > diag) break;
352   }
353   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
354 
355   /* diagonal part */
356   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358   /* right of diagonal part */
359   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatSetValues_MPIAIJ"
365 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
366 {
367   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
368   PetscScalar    value;
369   PetscErrorCode ierr;
370   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
371   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
372   PetscBool      roworiented = aij->roworiented;
373 
374   /* Some Variables required in the macro */
375   Mat            A = aij->A;
376   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
377   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
378   MatScalar      *aa = a->a;
379   PetscBool      ignorezeroentries = a->ignorezeroentries;
380   Mat            B = aij->B;
381   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
382   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
383   MatScalar      *ba = b->a;
384 
385   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
386   PetscInt       nonew = a->nonew;
387   MatScalar      *ap1,*ap2;
388 
389   PetscFunctionBegin;
390   if (v) PetscValidScalarPointer(v,6);
391   for (i=0; i<m; i++) {
392     if (im[i] < 0) continue;
393 #if defined(PETSC_USE_DEBUG)
394     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
395 #endif
396     if (im[i] >= rstart && im[i] < rend) {
397       row      = im[i] - rstart;
398       lastcol1 = -1;
399       rp1      = aj + ai[row];
400       ap1      = aa + ai[row];
401       rmax1    = aimax[row];
402       nrow1    = ailen[row];
403       low1     = 0;
404       high1    = nrow1;
405       lastcol2 = -1;
406       rp2      = bj + bi[row];
407       ap2      = ba + bi[row];
408       rmax2    = bimax[row];
409       nrow2    = bilen[row];
410       low2     = 0;
411       high2    = nrow2;
412 
413       for (j=0; j<n; j++) {
414         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
415         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
416         if (in[j] >= cstart && in[j] < cend){
417           col = in[j] - cstart;
418           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!aij->colmap) {
426               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
430 	    col--;
431 #else
432             col = aij->colmap[in[j]] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
435               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
438               B = aij->B;
439               b = (Mat_SeqAIJ*)B->data;
440               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
441               rp2      = bj + bi[row];
442               ap2      = ba + bi[row];
443               rmax2    = bimax[row];
444               nrow2    = bilen[row];
445               low2     = 0;
446               high2    = nrow2;
447               bm       = aij->B->rmap->n;
448               ba = b->a;
449             }
450           } else col = in[j];
451           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
452         }
453       }
454     } else {
455       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
456       if (!aij->donotstash) {
457         if (roworiented) {
458           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
459         } else {
460           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatGetValues_MPIAIJ"
470 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
471 {
472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
473   PetscErrorCode ierr;
474   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
475   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
476 
477   PetscFunctionBegin;
478   for (i=0; i<m; i++) {
479     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
480     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
481     if (idxm[i] >= rstart && idxm[i] < rend) {
482       row = idxm[i] - rstart;
483       for (j=0; j<n; j++) {
484         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
485         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
486         if (idxn[j] >= cstart && idxn[j] < cend){
487           col = idxn[j] - cstart;
488           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
489         } else {
490           if (!aij->colmap) {
491             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
492           }
493 #if defined (PETSC_USE_CTABLE)
494           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
495           col --;
496 #else
497           col = aij->colmap[idxn[j]] - 1;
498 #endif
499           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
500           else {
501             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
502           }
503         }
504       }
505     } else {
506       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
507     }
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
516 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
519   PetscErrorCode ierr;
520   PetscInt       nstash,reallocs;
521   InsertMode     addv;
522 
523   PetscFunctionBegin;
524   if (aij->donotstash || mat->nooffprocentries) {
525     PetscFunctionReturn(0);
526   }
527 
528   /* make sure all processors are either in INSERTMODE or ADDMODE */
529   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
530   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
531   mat->insertmode = addv; /* in case this processor had no cache */
532 
533   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
534   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
535   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
541 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
542 {
543   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
544   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
545   PetscErrorCode ierr;
546   PetscMPIInt    n;
547   PetscInt       i,j,rstart,ncols,flg;
548   PetscInt       *row,*col;
549   PetscBool      other_disassembled;
550   PetscScalar    *val;
551   InsertMode     addv = mat->insertmode;
552 
553   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
554   PetscFunctionBegin;
555   if (!aij->donotstash && !mat->nooffprocentries) {
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         /* Now assemble all these values with a single function call */
566         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
571   }
572   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
589   }
590   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
591   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
596   aij->rowvalues = 0;
597 
598   /* used by MatAXPY() */
599   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
600   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
601 
602   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
603   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
609 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
610 {
611   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
616   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatZeroRows_MPIAIJ"
622 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
623 {
624   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
625   PetscErrorCode    ierr;
626   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
627   PetscInt          i,*owners = A->rmap->range;
628   PetscInt          *nprocs,j,idx,nsends,row;
629   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
630   PetscInt          *rvalues,count,base,slen,*source;
631   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
632   MPI_Comm          comm = ((PetscObject)A)->comm;
633   MPI_Request       *send_waits,*recv_waits;
634   MPI_Status        recv_status,*send_status;
635   const PetscScalar *xx;
636   PetscScalar       *bb;
637 #if defined(PETSC_DEBUG)
638   PetscBool      found = PETSC_FALSE;
639 #endif
640 
641   PetscFunctionBegin;
642   /*  first count number of contributors to each processor */
643   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
644   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
645   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
646   j = 0;
647   for (i=0; i<N; i++) {
648     if (lastidx > (idx = rows[i])) j = 0;
649     lastidx = idx;
650     for (; j<size; j++) {
651       if (idx >= owners[j] && idx < owners[j+1]) {
652         nprocs[2*j]++;
653         nprocs[2*j+1] = 1;
654         owner[i] = j;
655 #if defined(PETSC_DEBUG)
656         found = PETSC_TRUE;
657 #endif
658         break;
659       }
660     }
661 #if defined(PETSC_DEBUG)
662     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
663     found = PETSC_FALSE;
664 #endif
665   }
666   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
667 
668   if (A->nooffproczerorows) {
669     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
670     nrecvs = nsends;
671     nmax   = N;
672   } else {
673     /* inform other processors of number of messages and max length*/
674     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
675   }
676 
677   /* post receives:   */
678   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
679   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
680   for (i=0; i<nrecvs; i++) {
681     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
682   }
683 
684   /* do sends:
685       1) starts[i] gives the starting index in svalues for stuff going to
686          the ith processor
687   */
688   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
689   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
690   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
691   starts[0] = 0;
692   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
693   for (i=0; i<N; i++) {
694     svalues[starts[owner[i]]++] = rows[i];
695   }
696 
697   starts[0] = 0;
698   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
699   count = 0;
700   for (i=0; i<size; i++) {
701     if (nprocs[2*i+1]) {
702       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
703     }
704   }
705   ierr = PetscFree(starts);CHKERRQ(ierr);
706 
707   base = owners[rank];
708 
709   /*  wait on receives */
710   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
711   count  = nrecvs; slen = 0;
712   while (count) {
713     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
714     /* unpack receives into our local space */
715     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
716     source[imdex]  = recv_status.MPI_SOURCE;
717     lens[imdex]    = n;
718     slen          += n;
719     count--;
720   }
721   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
722 
723   /* move the data into the send scatter */
724   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
725   count = 0;
726   for (i=0; i<nrecvs; i++) {
727     values = rvalues + i*nmax;
728     for (j=0; j<lens[i]; j++) {
729       lrows[count++] = values[j] - base;
730     }
731   }
732   ierr = PetscFree(rvalues);CHKERRQ(ierr);
733   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
734   ierr = PetscFree(owner);CHKERRQ(ierr);
735   ierr = PetscFree(nprocs);CHKERRQ(ierr);
736 
737   /* fix right hand side if needed */
738   if (x && b) {
739     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
740     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
741     for (i=0; i<slen; i++) {
742       bb[lrows[i]] = diag*xx[lrows[i]];
743     }
744     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
745     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
746   }
747   /*
748         Zero the required rows. If the "diagonal block" of the matrix
749      is square and the user wishes to set the diagonal we use separate
750      code so that MatSetValues() is not called for each diagonal allocating
751      new memory, thus calling lots of mallocs and slowing things down.
752 
753   */
754   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
755   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
756   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
757     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
758   } else if (diag != 0.0) {
759     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
760     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
761       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
762 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
763     }
764     for (i = 0; i < slen; i++) {
765       row  = lrows[i] + rstart;
766       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
767     }
768     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
769     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770   } else {
771     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
772   }
773   ierr = PetscFree(lrows);CHKERRQ(ierr);
774 
775   /* wait on sends */
776   if (nsends) {
777     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
778     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
779     ierr = PetscFree(send_status);CHKERRQ(ierr);
780   }
781   ierr = PetscFree(send_waits);CHKERRQ(ierr);
782   ierr = PetscFree(svalues);CHKERRQ(ierr);
783 
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
789 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
790 {
791   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
792   PetscErrorCode    ierr;
793   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
794   PetscInt          i,*owners = A->rmap->range;
795   PetscInt          *nprocs,j,idx,nsends;
796   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
797   PetscInt          *rvalues,count,base,slen,*source;
798   PetscInt          *lens,*lrows,*values,m;
799   MPI_Comm          comm = ((PetscObject)A)->comm;
800   MPI_Request       *send_waits,*recv_waits;
801   MPI_Status        recv_status,*send_status;
802   const PetscScalar *xx;
803   PetscScalar       *bb,*mask;
804   Vec               xmask,lmask;
805   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
806   const PetscInt    *aj, *ii,*ridx;
807   PetscScalar       *aa;
808 #if defined(PETSC_DEBUG)
809   PetscBool         found = PETSC_FALSE;
810 #endif
811 
812   PetscFunctionBegin;
813   /*  first count number of contributors to each processor */
814   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
815   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
816   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
817   j = 0;
818   for (i=0; i<N; i++) {
819     if (lastidx > (idx = rows[i])) j = 0;
820     lastidx = idx;
821     for (; j<size; j++) {
822       if (idx >= owners[j] && idx < owners[j+1]) {
823         nprocs[2*j]++;
824         nprocs[2*j+1] = 1;
825         owner[i] = j;
826 #if defined(PETSC_DEBUG)
827         found = PETSC_TRUE;
828 #endif
829         break;
830       }
831     }
832 #if defined(PETSC_DEBUG)
833     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
834     found = PETSC_FALSE;
835 #endif
836   }
837   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
838 
839   /* inform other processors of number of messages and max length*/
840   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
841 
842   /* post receives:   */
843   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
844   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
845   for (i=0; i<nrecvs; i++) {
846     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
847   }
848 
849   /* do sends:
850       1) starts[i] gives the starting index in svalues for stuff going to
851          the ith processor
852   */
853   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
854   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
855   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
856   starts[0] = 0;
857   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
858   for (i=0; i<N; i++) {
859     svalues[starts[owner[i]]++] = rows[i];
860   }
861 
862   starts[0] = 0;
863   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
864   count = 0;
865   for (i=0; i<size; i++) {
866     if (nprocs[2*i+1]) {
867       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
868     }
869   }
870   ierr = PetscFree(starts);CHKERRQ(ierr);
871 
872   base = owners[rank];
873 
874   /*  wait on receives */
875   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
876   count  = nrecvs; slen = 0;
877   while (count) {
878     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
879     /* unpack receives into our local space */
880     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
881     source[imdex]  = recv_status.MPI_SOURCE;
882     lens[imdex]    = n;
883     slen          += n;
884     count--;
885   }
886   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
887 
888   /* move the data into the send scatter */
889   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
890   count = 0;
891   for (i=0; i<nrecvs; i++) {
892     values = rvalues + i*nmax;
893     for (j=0; j<lens[i]; j++) {
894       lrows[count++] = values[j] - base;
895     }
896   }
897   ierr = PetscFree(rvalues);CHKERRQ(ierr);
898   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
899   ierr = PetscFree(owner);CHKERRQ(ierr);
900   ierr = PetscFree(nprocs);CHKERRQ(ierr);
901   /* lrows are the local rows to be zeroed, slen is the number of local rows */
902 
903   /* zero diagonal part of matrix */
904   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
905 
906   /* handle off diagonal part of matrix */
907   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
908   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
909   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
910   for (i=0; i<slen; i++) {
911     bb[lrows[i]] = 1;
912   }
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(xmask);CHKERRQ(ierr);
917   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
921   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922 
923   /* remove zeroed rows of off diagonal matrix */
924   ii = aij->i;
925   for (i=0; i<slen; i++) {
926     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
927   }
928 
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use){
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++){
935       n   = ii[i+1] - ii[i];
936       aj  = aij->j + ii[i];
937       aa  = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           bb[*ridx] -= *aa*xx[*aj];
942           *aa        = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951    for (i=0; i<m; i++) {
952      n   = ii[i+1] - ii[i];
953      aj  = aij->j + ii[i];
954      aa  = aij->a + ii[i];
955      for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           bb[i] -= *aa*xx[*aj];
958           *aa    = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965 
966   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
968   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   ierr = VecDestroy(lmask);CHKERRQ(ierr);
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971 
972   /* wait on sends */
973   if (nsends) {
974     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
975     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
976     ierr = PetscFree(send_status);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(send_waits);CHKERRQ(ierr);
979   ierr = PetscFree(svalues);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMult_MPIAIJ"
986 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscInt       nt;
991 
992   PetscFunctionBegin;
993   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
994   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
995   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1004 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1016 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1024   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1031 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1032 {
1033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1034   PetscErrorCode ierr;
1035   PetscBool      merged;
1036 
1037   PetscFunctionBegin;
1038   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1039   /* do nondiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1041   if (!merged) {
1042     /* send it on its way */
1043     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044     /* do local part */
1045     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1046     /* receive remote parts: note this assumes the values are not actually */
1047     /* added in yy until the next line, */
1048     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049   } else {
1050     /* do local part */
1051     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1052     /* send it on its way */
1053     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054     /* values actually were received in the Begin() but we need to call this nop */
1055     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 EXTERN_C_BEGIN
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1063 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1064 {
1065   MPI_Comm       comm;
1066   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1067   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1068   IS             Me,Notme;
1069   PetscErrorCode ierr;
1070   PetscInt       M,N,first,last,*notme,i;
1071   PetscMPIInt    size;
1072 
1073   PetscFunctionBegin;
1074 
1075   /* Easy test: symmetric diagonal block */
1076   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1077   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1078   if (!*f) PetscFunctionReturn(0);
1079   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081   if (size == 1) PetscFunctionReturn(0);
1082 
1083   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1084   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1085   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1086   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1087   for (i=0; i<first; i++) notme[i] = i;
1088   for (i=last; i<M; i++) notme[i-last+first] = i;
1089   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1090   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1091   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1092   Aoff = Aoffs[0];
1093   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1094   Boff = Boffs[0];
1095   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1096   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1098   ierr = ISDestroy(Me);CHKERRQ(ierr);
1099   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1100 
1101   PetscFunctionReturn(0);
1102 }
1103 EXTERN_C_END
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1107 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   /* do nondiagonal part */
1114   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1115   /* send it on its way */
1116   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1117   /* do local part */
1118   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1119   /* receive remote parts */
1120   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 /*
1125   This only works correctly for square matrices where the subblock A->A is the
1126    diagonal block
1127 */
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1130 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1131 {
1132   PetscErrorCode ierr;
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134 
1135   PetscFunctionBegin;
1136   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1137   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1138   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatScale_MPIAIJ"
1144 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1145 {
1146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1151   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatDestroy_MPIAIJ"
1157 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1158 {
1159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163 #if defined(PETSC_USE_LOG)
1164   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1165 #endif
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1168   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1169   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1170 #if defined (PETSC_USE_CTABLE)
1171   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1177   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(aij);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1196 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1197 {
1198   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1199   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1200   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1201   PetscErrorCode    ierr;
1202   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1203   int               fd;
1204   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1205   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1206   PetscScalar       *column_values;
1207   PetscInt          message_count,flowcontrolcount;
1208 
1209   PetscFunctionBegin;
1210   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1211   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1212   nz   = A->nz + B->nz;
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1218     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) {
1224       rlen = PetscMax(rlen,range[i+1] - range[i]);
1225     }
1226   } else {
1227     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1228     rlen = mat->rmap->n;
1229   }
1230 
1231   /* load up the local row counts */
1232   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1233   for (i=0; i<mat->rmap->n; i++) {
1234     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1235   }
1236 
1237   /* store the row lengths to the file */
1238   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1239   if (!rank) {
1240     MPI_Status status;
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1251     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* )th processor needs space a largest processor needs */
1258   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1259   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1260   cnt  = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ( (col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) {
1267       column_indices[cnt++] = A->j[k] + cstart;
1268     }
1269     for (; j<B->i[i+1]; j++) {
1270       column_indices[cnt++] = garray[B->j[j]];
1271     }
1272   }
1273   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1274 
1275   /* store the column indices to the file */
1276    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     MPI_Status status;
1279     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1280     for (i=1; i<size; i++) {
1281       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1282       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1283       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1284       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1290     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1291     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1292     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1293   }
1294   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1295 
1296   /* load up the local column values */
1297   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1298   cnt  = 0;
1299   for (i=0; i<mat->rmap->n; i++) {
1300     for (j=B->i[i]; j<B->i[i+1]; j++) {
1301       if ( garray[B->j[j]] > cstart) break;
1302       column_values[cnt++] = B->a[j];
1303     }
1304     for (k=A->i[i]; k<A->i[i+1]; k++) {
1305       column_values[cnt++] = A->a[k];
1306     }
1307     for (; j<B->i[i+1]; j++) {
1308       column_values[cnt++] = B->a[j];
1309     }
1310   }
1311   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1312 
1313   /* store the column values to the file */
1314    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1315   if (!rank) {
1316     MPI_Status status;
1317     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1318     for (i=1; i<size; i++) {
1319        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1320       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1321       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1322       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1323       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1324     }
1325     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1326   } else {
1327     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1328     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1329     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1331   }
1332   ierr = PetscFree(column_values);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1338 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1339 {
1340   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1341   PetscErrorCode    ierr;
1342   PetscMPIInt       rank = aij->rank,size = aij->size;
1343   PetscBool         isdraw,iascii,isbinary;
1344   PetscViewer       sviewer;
1345   PetscViewerFormat format;
1346 
1347   PetscFunctionBegin;
1348   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1350   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1351   if (iascii) {
1352     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1353     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1354       MatInfo    info;
1355       PetscBool  inodes;
1356 
1357       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1358       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1359       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1361       if (!inodes) {
1362         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1363 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1364       } else {
1365         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1366 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1367       }
1368       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1369       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1370       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1371       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1372       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1374       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1375       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1376       PetscFunctionReturn(0);
1377     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1378       PetscInt   inodecount,inodelimit,*inodes;
1379       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1380       if (inodes) {
1381         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1382       } else {
1383         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1384       }
1385       PetscFunctionReturn(0);
1386     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1387       PetscFunctionReturn(0);
1388     }
1389   } else if (isbinary) {
1390     if (size == 1) {
1391       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1392       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1393     } else {
1394       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1395     }
1396     PetscFunctionReturn(0);
1397   } else if (isdraw) {
1398     PetscDraw  draw;
1399     PetscBool  isnull;
1400     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1401     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1402   }
1403 
1404   if (size == 1) {
1405     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1406     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1407   } else {
1408     /* assemble the entire matrix onto first processor. */
1409     Mat         A;
1410     Mat_SeqAIJ  *Aloc;
1411     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1412     MatScalar   *a;
1413 
1414     if (mat->rmap->N > 1024) {
1415       PetscBool  flg = PETSC_FALSE;
1416 
1417       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1418       if (!flg) {
1419         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1420       }
1421     }
1422 
1423     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1424     if (!rank) {
1425       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1426     } else {
1427       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1428     }
1429     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1430     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1431     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1432     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1433 
1434     /* copy over the A part */
1435     Aloc = (Mat_SeqAIJ*)aij->A->data;
1436     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1437     row = mat->rmap->rstart;
1438     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1439     for (i=0; i<m; i++) {
1440       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1441       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1442     }
1443     aj = Aloc->j;
1444     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1445 
1446     /* copy over the B part */
1447     Aloc = (Mat_SeqAIJ*)aij->B->data;
1448     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1449     row  = mat->rmap->rstart;
1450     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1451     ct   = cols;
1452     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1453     for (i=0; i<m; i++) {
1454       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1455       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1456     }
1457     ierr = PetscFree(ct);CHKERRQ(ierr);
1458     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460     /*
1461        Everyone has to call to draw the matrix since the graphics waits are
1462        synchronized across all processors that share the PetscDraw object
1463     */
1464     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1465     if (!rank) {
1466       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1467       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1468       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1469       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1470     }
1471     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1472     ierr = MatDestroy(A);CHKERRQ(ierr);
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatView_MPIAIJ"
1479 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1480 {
1481   PetscErrorCode ierr;
1482   PetscBool      iascii,isdraw,issocket,isbinary;
1483 
1484   PetscFunctionBegin;
1485   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1486   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1487   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1488   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1489   if (iascii || isdraw || isbinary || issocket) {
1490     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1491   } else {
1492     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatSOR_MPIAIJ"
1499 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1500 {
1501   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1502   PetscErrorCode ierr;
1503   Vec            bb1 = 0;
1504   PetscBool      hasop;
1505 
1506   PetscFunctionBegin;
1507   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1508     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1509   }
1510 
1511   if (flag == SOR_APPLY_UPPER) {
1512     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1513     PetscFunctionReturn(0);
1514   }
1515 
1516   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1517     if (flag & SOR_ZERO_INITIAL_GUESS) {
1518       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1519       its--;
1520     }
1521 
1522     while (its--) {
1523       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1524       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525 
1526       /* update rhs: bb1 = bb - B*x */
1527       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1528       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1529 
1530       /* local sweep */
1531       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1532     }
1533   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1534     if (flag & SOR_ZERO_INITIAL_GUESS) {
1535       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1536       its--;
1537     }
1538     while (its--) {
1539       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1540       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541 
1542       /* update rhs: bb1 = bb - B*x */
1543       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1544       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1545 
1546       /* local sweep */
1547       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1548     }
1549   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1550     if (flag & SOR_ZERO_INITIAL_GUESS) {
1551       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1552       its--;
1553     }
1554     while (its--) {
1555       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557 
1558       /* update rhs: bb1 = bb - B*x */
1559       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1560       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1561 
1562       /* local sweep */
1563       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1564     }
1565   }  else if (flag & SOR_EISENSTAT) {
1566     Vec         xx1;
1567 
1568     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1569     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1570 
1571     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1572     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573     if (!mat->diag) {
1574       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1575       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1576     }
1577     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1578     if (hasop) {
1579       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1580     } else {
1581       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1582     }
1583     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1584 
1585     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1586 
1587     /* local sweep */
1588     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1589     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1590     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1591   } else {
1592     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1593   }
1594 
1595   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatPermute_MPIAIJ"
1601 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1602 {
1603   MPI_Comm       comm,pcomm;
1604   PetscInt       first,local_size,nrows;
1605   const PetscInt *rows;
1606   PetscMPIInt    size;
1607   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1612   /* make a collective version of 'rowp' */
1613   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1614   if (pcomm==comm) {
1615     crowp = rowp;
1616   } else {
1617     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1618     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1619     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1620     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1621   }
1622   /* collect the global row permutation and invert it */
1623   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1624   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1625   if (pcomm!=comm) {
1626     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1627   }
1628   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1629   /* get the local target indices */
1630   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1631   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1632   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1633   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1634   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1635   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1636   /* the column permutation is so much easier;
1637      make a local version of 'colp' and invert it */
1638   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1639   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1640   if (size==1) {
1641     lcolp = colp;
1642   } else {
1643     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1644     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1645     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1646   }
1647   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1648   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1649   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1650   if (size>1) {
1651     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1652     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1653   }
1654   /* now we just get the submatrix */
1655   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1656   /* clean up */
1657   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1658   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1664 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1665 {
1666   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1667   Mat            A = mat->A,B = mat->B;
1668   PetscErrorCode ierr;
1669   PetscReal      isend[5],irecv[5];
1670 
1671   PetscFunctionBegin;
1672   info->block_size     = 1.0;
1673   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1674   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1675   isend[3] = info->memory;  isend[4] = info->mallocs;
1676   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1677   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1678   isend[3] += info->memory;  isend[4] += info->mallocs;
1679   if (flag == MAT_LOCAL) {
1680     info->nz_used      = isend[0];
1681     info->nz_allocated = isend[1];
1682     info->nz_unneeded  = isend[2];
1683     info->memory       = isend[3];
1684     info->mallocs      = isend[4];
1685   } else if (flag == MAT_GLOBAL_MAX) {
1686     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1687     info->nz_used      = irecv[0];
1688     info->nz_allocated = irecv[1];
1689     info->nz_unneeded  = irecv[2];
1690     info->memory       = irecv[3];
1691     info->mallocs      = irecv[4];
1692   } else if (flag == MAT_GLOBAL_SUM) {
1693     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1694     info->nz_used      = irecv[0];
1695     info->nz_allocated = irecv[1];
1696     info->nz_unneeded  = irecv[2];
1697     info->memory       = irecv[3];
1698     info->mallocs      = irecv[4];
1699   }
1700   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1701   info->fill_ratio_needed = 0;
1702   info->factor_mallocs    = 0;
1703 
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatSetOption_MPIAIJ"
1709 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1710 {
1711   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   switch (op) {
1716   case MAT_NEW_NONZERO_LOCATIONS:
1717   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1718   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1719   case MAT_KEEP_NONZERO_PATTERN:
1720   case MAT_NEW_NONZERO_LOCATION_ERR:
1721   case MAT_USE_INODES:
1722   case MAT_IGNORE_ZERO_ENTRIES:
1723     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1724     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_ROW_ORIENTED:
1727     a->roworiented = flg;
1728     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1729     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_NEW_DIAGONALS:
1732     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1733     break;
1734   case MAT_IGNORE_OFF_PROC_ENTRIES:
1735     a->donotstash = PETSC_TRUE;
1736     break;
1737   case MAT_SPD:
1738     A->spd_set                         = PETSC_TRUE;
1739     A->spd                             = flg;
1740     if (flg) {
1741       A->symmetric                     = PETSC_TRUE;
1742       A->structurally_symmetric        = PETSC_TRUE;
1743       A->symmetric_set                 = PETSC_TRUE;
1744       A->structurally_symmetric_set    = PETSC_TRUE;
1745     }
1746     break;
1747   case MAT_SYMMETRIC:
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_STRUCTURALLY_SYMMETRIC:
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     break;
1753   case MAT_HERMITIAN:
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_SYMMETRY_ETERNAL:
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   default:
1760     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1761   }
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatGetRow_MPIAIJ"
1767 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1768 {
1769   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1770   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1771   PetscErrorCode ierr;
1772   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1773   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1774   PetscInt       *cmap,*idx_p;
1775 
1776   PetscFunctionBegin;
1777   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1778   mat->getrowactive = PETSC_TRUE;
1779 
1780   if (!mat->rowvalues && (idx || v)) {
1781     /*
1782         allocate enough space to hold information from the longest row.
1783     */
1784     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1785     PetscInt   max = 1,tmp;
1786     for (i=0; i<matin->rmap->n; i++) {
1787       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1788       if (max < tmp) { max = tmp; }
1789     }
1790     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1791   }
1792 
1793   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1794   lrow = row - rstart;
1795 
1796   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1797   if (!v)   {pvA = 0; pvB = 0;}
1798   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1799   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1800   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1801   nztot = nzA + nzB;
1802 
1803   cmap  = mat->garray;
1804   if (v  || idx) {
1805     if (nztot) {
1806       /* Sort by increasing column numbers, assuming A and B already sorted */
1807       PetscInt imark = -1;
1808       if (v) {
1809         *v = v_p = mat->rowvalues;
1810         for (i=0; i<nzB; i++) {
1811           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1812           else break;
1813         }
1814         imark = i;
1815         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1816         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1817       }
1818       if (idx) {
1819         *idx = idx_p = mat->rowindices;
1820         if (imark > -1) {
1821           for (i=0; i<imark; i++) {
1822             idx_p[i] = cmap[cworkB[i]];
1823           }
1824         } else {
1825           for (i=0; i<nzB; i++) {
1826             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1827             else break;
1828           }
1829           imark = i;
1830         }
1831         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1832         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1833       }
1834     } else {
1835       if (idx) *idx = 0;
1836       if (v)   *v   = 0;
1837     }
1838   }
1839   *nz = nztot;
1840   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1841   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1847 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1848 {
1849   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1850 
1851   PetscFunctionBegin;
1852   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1853   aij->getrowactive = PETSC_FALSE;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatNorm_MPIAIJ"
1859 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1860 {
1861   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1862   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1863   PetscErrorCode ierr;
1864   PetscInt       i,j,cstart = mat->cmap->rstart;
1865   PetscReal      sum = 0.0;
1866   MatScalar      *v;
1867 
1868   PetscFunctionBegin;
1869   if (aij->size == 1) {
1870     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1871   } else {
1872     if (type == NORM_FROBENIUS) {
1873       v = amat->a;
1874       for (i=0; i<amat->nz; i++) {
1875 #if defined(PETSC_USE_COMPLEX)
1876         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1877 #else
1878         sum += (*v)*(*v); v++;
1879 #endif
1880       }
1881       v = bmat->a;
1882       for (i=0; i<bmat->nz; i++) {
1883 #if defined(PETSC_USE_COMPLEX)
1884         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1885 #else
1886         sum += (*v)*(*v); v++;
1887 #endif
1888       }
1889       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1890       *norm = sqrt(*norm);
1891     } else if (type == NORM_1) { /* max column norm */
1892       PetscReal *tmp,*tmp2;
1893       PetscInt  *jj,*garray = aij->garray;
1894       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1895       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1896       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1897       *norm = 0.0;
1898       v = amat->a; jj = amat->j;
1899       for (j=0; j<amat->nz; j++) {
1900         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1901       }
1902       v = bmat->a; jj = bmat->j;
1903       for (j=0; j<bmat->nz; j++) {
1904         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1905       }
1906       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1907       for (j=0; j<mat->cmap->N; j++) {
1908         if (tmp2[j] > *norm) *norm = tmp2[j];
1909       }
1910       ierr = PetscFree(tmp);CHKERRQ(ierr);
1911       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1912     } else if (type == NORM_INFINITY) { /* max row norm */
1913       PetscReal ntemp = 0.0;
1914       for (j=0; j<aij->A->rmap->n; j++) {
1915         v = amat->a + amat->i[j];
1916         sum = 0.0;
1917         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1918           sum += PetscAbsScalar(*v); v++;
1919         }
1920         v = bmat->a + bmat->i[j];
1921         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1922           sum += PetscAbsScalar(*v); v++;
1923         }
1924         if (sum > ntemp) ntemp = sum;
1925       }
1926       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1927     } else {
1928       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1929     }
1930   }
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatTranspose_MPIAIJ"
1936 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1937 {
1938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1939   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1940   PetscErrorCode ierr;
1941   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1942   PetscInt       cstart=A->cmap->rstart,ncol;
1943   Mat            B;
1944   MatScalar      *array;
1945 
1946   PetscFunctionBegin;
1947   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1948 
1949   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1950   ai = Aloc->i; aj = Aloc->j;
1951   bi = Bloc->i; bj = Bloc->j;
1952   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1953     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1954     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1955     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1956     for (i=0; i<ai[ma]; i++){
1957       d_nnz[aj[i]] ++;
1958       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1959     }
1960 
1961     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1962     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1963     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1964     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1965     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1966   } else {
1967     B = *matout;
1968   }
1969 
1970   /* copy over the A part */
1971   array = Aloc->a;
1972   row = A->rmap->rstart;
1973   for (i=0; i<ma; i++) {
1974     ncol = ai[i+1]-ai[i];
1975     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1976     row++; array += ncol; aj += ncol;
1977   }
1978   aj = Aloc->j;
1979   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1980 
1981   /* copy over the B part */
1982   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1983   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1984   array = Bloc->a;
1985   row = A->rmap->rstart;
1986   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1987   cols_tmp = cols;
1988   for (i=0; i<mb; i++) {
1989     ncol = bi[i+1]-bi[i];
1990     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1991     row++; array += ncol; cols_tmp += ncol;
1992   }
1993   ierr = PetscFree(cols);CHKERRQ(ierr);
1994 
1995   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1998     *matout = B;
1999   } else {
2000     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2001   }
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2007 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2008 {
2009   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2010   Mat            a = aij->A,b = aij->B;
2011   PetscErrorCode ierr;
2012   PetscInt       s1,s2,s3;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2016   if (rr) {
2017     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2018     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2019     /* Overlap communication with computation. */
2020     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2021   }
2022   if (ll) {
2023     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2024     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2025     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2026   }
2027   /* scale  the diagonal block */
2028   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2029 
2030   if (rr) {
2031     /* Do a scatter end and then right scale the off-diagonal block */
2032     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2033     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2034   }
2035 
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2041 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2042 {
2043   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2048   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2049   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2050   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2055 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2056 {
2057   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2058   PetscErrorCode ierr;
2059 
2060   PetscFunctionBegin;
2061   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatEqual_MPIAIJ"
2067 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2068 {
2069   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2070   Mat            a,b,c,d;
2071   PetscBool      flg;
2072   PetscErrorCode ierr;
2073 
2074   PetscFunctionBegin;
2075   a = matA->A; b = matA->B;
2076   c = matB->A; d = matB->B;
2077 
2078   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2079   if (flg) {
2080     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2081   }
2082   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatCopy_MPIAIJ"
2088 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2089 {
2090   PetscErrorCode ierr;
2091   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2092   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2093 
2094   PetscFunctionBegin;
2095   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2096   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2097     /* because of the column compression in the off-processor part of the matrix a->B,
2098        the number of columns in a->B and b->B may be different, hence we cannot call
2099        the MatCopy() directly on the two parts. If need be, we can provide a more
2100        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2101        then copying the submatrices */
2102     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2103   } else {
2104     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2105     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2106   }
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2112 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2113 {
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatAXPY_MPIAIJ"
2123 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2124 {
2125   PetscErrorCode ierr;
2126   PetscInt       i;
2127   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2128   PetscBLASInt   bnz,one=1;
2129   Mat_SeqAIJ     *x,*y;
2130 
2131   PetscFunctionBegin;
2132   if (str == SAME_NONZERO_PATTERN) {
2133     PetscScalar alpha = a;
2134     x = (Mat_SeqAIJ *)xx->A->data;
2135     y = (Mat_SeqAIJ *)yy->A->data;
2136     bnz = PetscBLASIntCast(x->nz);
2137     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2138     x = (Mat_SeqAIJ *)xx->B->data;
2139     y = (Mat_SeqAIJ *)yy->B->data;
2140     bnz = PetscBLASIntCast(x->nz);
2141     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2142   } else if (str == SUBSET_NONZERO_PATTERN) {
2143     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2144 
2145     x = (Mat_SeqAIJ *)xx->B->data;
2146     y = (Mat_SeqAIJ *)yy->B->data;
2147     if (y->xtoy && y->XtoY != xx->B) {
2148       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2149       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2150     }
2151     if (!y->xtoy) { /* get xtoy */
2152       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2153       y->XtoY = xx->B;
2154       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2155     }
2156     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2157   } else {
2158     Mat B;
2159     PetscInt *nnz_d,*nnz_o;
2160     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2161     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2162     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2163     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2164     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2165     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2166     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2167     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2168     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2169     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2170     ierr = MatHeaderReplace(Y,B);
2171     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2172     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatConjugate_MPIAIJ"
2181 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2182 {
2183 #if defined(PETSC_USE_COMPLEX)
2184   PetscErrorCode ierr;
2185   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2186 
2187   PetscFunctionBegin;
2188   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2189   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2190 #else
2191   PetscFunctionBegin;
2192 #endif
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatRealPart_MPIAIJ"
2198 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2199 {
2200   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2205   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2211 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2212 {
2213   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2214   PetscErrorCode ierr;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2218   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 #ifdef PETSC_HAVE_PBGL
2223 
2224 #include <boost/parallel/mpi/bsp_process_group.hpp>
2225 #include <boost/graph/distributed/ilu_default_graph.hpp>
2226 #include <boost/graph/distributed/ilu_0_block.hpp>
2227 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2228 #include <boost/graph/distributed/petsc/interface.hpp>
2229 #include <boost/multi_array.hpp>
2230 #include <boost/parallel/distributed_property_map->hpp>
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2234 /*
2235   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2236 */
2237 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2238 {
2239   namespace petsc = boost::distributed::petsc;
2240 
2241   namespace graph_dist = boost::graph::distributed;
2242   using boost::graph::distributed::ilu_default::process_group_type;
2243   using boost::graph::ilu_permuted;
2244 
2245   PetscBool       row_identity, col_identity;
2246   PetscContainer  c;
2247   PetscInt        m, n, M, N;
2248   PetscErrorCode  ierr;
2249 
2250   PetscFunctionBegin;
2251   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2252   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2253   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2254   if (!row_identity || !col_identity) {
2255     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2256   }
2257 
2258   process_group_type pg;
2259   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2260   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2261   lgraph_type&   level_graph = *lgraph_p;
2262   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2263 
2264   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2265   ilu_permuted(level_graph);
2266 
2267   /* put together the new matrix */
2268   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2269   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2270   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2271   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2272   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2273   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2275 
2276   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2277   ierr = PetscContainerSetPointer(c, lgraph_p);
2278   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2284 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2285 {
2286   PetscFunctionBegin;
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatSolve_MPIAIJ"
2292 /*
2293   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2294 */
2295 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2296 {
2297   namespace graph_dist = boost::graph::distributed;
2298 
2299   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2300   lgraph_type*   lgraph_p;
2301   PetscContainer c;
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2306   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2307   ierr = VecCopy(b, x);CHKERRQ(ierr);
2308 
2309   PetscScalar* array_x;
2310   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2311   PetscInt sx;
2312   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2313 
2314   PetscScalar* array_b;
2315   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2316   PetscInt sb;
2317   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2318 
2319   lgraph_type&   level_graph = *lgraph_p;
2320   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2321 
2322   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2323   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2324                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2325 
2326   typedef boost::iterator_property_map<array_ref_type::iterator,
2327                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2328   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2329                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2330 
2331   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2332 
2333   PetscFunctionReturn(0);
2334 }
2335 #endif
2336 
2337 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2338   PetscInt       nzlocal,nsends,nrecvs;
2339   PetscMPIInt    *send_rank,*recv_rank;
2340   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2341   PetscScalar    *sbuf_a,**rbuf_a;
2342   PetscErrorCode (*MatDestroy)(Mat);
2343 } Mat_Redundant;
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2347 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2348 {
2349   PetscErrorCode       ierr;
2350   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2351   PetscInt             i;
2352 
2353   PetscFunctionBegin;
2354   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2355   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2356   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2357   for (i=0; i<redund->nrecvs; i++){
2358     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2359     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2360   }
2361   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2362   ierr = PetscFree(redund);CHKERRQ(ierr);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatDestroy_MatRedundant"
2368 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2369 {
2370   PetscErrorCode  ierr;
2371   PetscContainer  container;
2372   Mat_Redundant   *redund=PETSC_NULL;
2373 
2374   PetscFunctionBegin;
2375   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2376   if (container) {
2377     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2378   } else {
2379     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2380   }
2381   A->ops->destroy = redund->MatDestroy;
2382   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2383   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2384   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2390 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2391 {
2392   PetscMPIInt    rank,size;
2393   MPI_Comm       comm=((PetscObject)mat)->comm;
2394   PetscErrorCode ierr;
2395   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2396   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2397   PetscInt       *rowrange=mat->rmap->range;
2398   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2399   Mat            A=aij->A,B=aij->B,C=*matredundant;
2400   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2401   PetscScalar    *sbuf_a;
2402   PetscInt       nzlocal=a->nz+b->nz;
2403   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2404   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2405   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2406   MatScalar      *aworkA,*aworkB;
2407   PetscScalar    *vals;
2408   PetscMPIInt    tag1,tag2,tag3,imdex;
2409   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2410                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2411   MPI_Status     recv_status,*send_status;
2412   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2413   PetscInt       **rbuf_j=PETSC_NULL;
2414   PetscScalar    **rbuf_a=PETSC_NULL;
2415   Mat_Redundant  *redund=PETSC_NULL;
2416   PetscContainer container;
2417 
2418   PetscFunctionBegin;
2419   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2420   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2421 
2422   if (reuse == MAT_REUSE_MATRIX) {
2423     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2424     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2425     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2426     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2427     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2428     if (container) {
2429       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2430     } else {
2431       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2432     }
2433     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2434 
2435     nsends    = redund->nsends;
2436     nrecvs    = redund->nrecvs;
2437     send_rank = redund->send_rank;
2438     recv_rank = redund->recv_rank;
2439     sbuf_nz   = redund->sbuf_nz;
2440     rbuf_nz   = redund->rbuf_nz;
2441     sbuf_j    = redund->sbuf_j;
2442     sbuf_a    = redund->sbuf_a;
2443     rbuf_j    = redund->rbuf_j;
2444     rbuf_a    = redund->rbuf_a;
2445   }
2446 
2447   if (reuse == MAT_INITIAL_MATRIX){
2448     PetscMPIInt  subrank,subsize;
2449     PetscInt     nleftover,np_subcomm;
2450     /* get the destination processors' id send_rank, nsends and nrecvs */
2451     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2452     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2453     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2454     np_subcomm = size/nsubcomm;
2455     nleftover  = size - nsubcomm*np_subcomm;
2456     nsends = 0; nrecvs = 0;
2457     for (i=0; i<size; i++){ /* i=rank*/
2458       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2459         send_rank[nsends] = i; nsends++;
2460         recv_rank[nrecvs++] = i;
2461       }
2462     }
2463     if (rank >= size - nleftover){/* this proc is a leftover processor */
2464       i = size-nleftover-1;
2465       j = 0;
2466       while (j < nsubcomm - nleftover){
2467         send_rank[nsends++] = i;
2468         i--; j++;
2469       }
2470     }
2471 
2472     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2473       for (i=0; i<nleftover; i++){
2474         recv_rank[nrecvs++] = size-nleftover+i;
2475       }
2476     }
2477 
2478     /* allocate sbuf_j, sbuf_a */
2479     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2480     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2481     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2482   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2483 
2484   /* copy mat's local entries into the buffers */
2485   if (reuse == MAT_INITIAL_MATRIX){
2486     rownz_max = 0;
2487     rptr = sbuf_j;
2488     cols = sbuf_j + rend-rstart + 1;
2489     vals = sbuf_a;
2490     rptr[0] = 0;
2491     for (i=0; i<rend-rstart; i++){
2492       row = i + rstart;
2493       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2494       ncols  = nzA + nzB;
2495       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2496       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2497       /* load the column indices for this row into cols */
2498       lwrite = 0;
2499       for (l=0; l<nzB; l++) {
2500         if ((ctmp = bmap[cworkB[l]]) < cstart){
2501           vals[lwrite]   = aworkB[l];
2502           cols[lwrite++] = ctmp;
2503         }
2504       }
2505       for (l=0; l<nzA; l++){
2506         vals[lwrite]   = aworkA[l];
2507         cols[lwrite++] = cstart + cworkA[l];
2508       }
2509       for (l=0; l<nzB; l++) {
2510         if ((ctmp = bmap[cworkB[l]]) >= cend){
2511           vals[lwrite]   = aworkB[l];
2512           cols[lwrite++] = ctmp;
2513         }
2514       }
2515       vals += ncols;
2516       cols += ncols;
2517       rptr[i+1] = rptr[i] + ncols;
2518       if (rownz_max < ncols) rownz_max = ncols;
2519     }
2520     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2521   } else { /* only copy matrix values into sbuf_a */
2522     rptr = sbuf_j;
2523     vals = sbuf_a;
2524     rptr[0] = 0;
2525     for (i=0; i<rend-rstart; i++){
2526       row = i + rstart;
2527       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2528       ncols  = nzA + nzB;
2529       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2530       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2531       lwrite = 0;
2532       for (l=0; l<nzB; l++) {
2533         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2534       }
2535       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2536       for (l=0; l<nzB; l++) {
2537         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2538       }
2539       vals += ncols;
2540       rptr[i+1] = rptr[i] + ncols;
2541     }
2542   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2543 
2544   /* send nzlocal to others, and recv other's nzlocal */
2545   /*--------------------------------------------------*/
2546   if (reuse == MAT_INITIAL_MATRIX){
2547     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2548     s_waits2 = s_waits3 + nsends;
2549     s_waits1 = s_waits2 + nsends;
2550     r_waits1 = s_waits1 + nsends;
2551     r_waits2 = r_waits1 + nrecvs;
2552     r_waits3 = r_waits2 + nrecvs;
2553   } else {
2554     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2555     r_waits3 = s_waits3 + nsends;
2556   }
2557 
2558   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2559   if (reuse == MAT_INITIAL_MATRIX){
2560     /* get new tags to keep the communication clean */
2561     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2562     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2563     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2564 
2565     /* post receives of other's nzlocal */
2566     for (i=0; i<nrecvs; i++){
2567       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2568     }
2569     /* send nzlocal to others */
2570     for (i=0; i<nsends; i++){
2571       sbuf_nz[i] = nzlocal;
2572       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2573     }
2574     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2575     count = nrecvs;
2576     while (count) {
2577       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2578       recv_rank[imdex] = recv_status.MPI_SOURCE;
2579       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2580       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2581 
2582       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2583       rbuf_nz[imdex] += i + 2;
2584       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2585       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2586       count--;
2587     }
2588     /* wait on sends of nzlocal */
2589     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2590     /* send mat->i,j to others, and recv from other's */
2591     /*------------------------------------------------*/
2592     for (i=0; i<nsends; i++){
2593       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2594       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2595     }
2596     /* wait on receives of mat->i,j */
2597     /*------------------------------*/
2598     count = nrecvs;
2599     while (count) {
2600       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2601       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2602       count--;
2603     }
2604     /* wait on sends of mat->i,j */
2605     /*---------------------------*/
2606     if (nsends) {
2607       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2608     }
2609   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2610 
2611   /* post receives, send and receive mat->a */
2612   /*----------------------------------------*/
2613   for (imdex=0; imdex<nrecvs; imdex++) {
2614     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2615   }
2616   for (i=0; i<nsends; i++){
2617     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2618   }
2619   count = nrecvs;
2620   while (count) {
2621     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2622     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2623     count--;
2624   }
2625   if (nsends) {
2626     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2627   }
2628 
2629   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2630 
2631   /* create redundant matrix */
2632   /*-------------------------*/
2633   if (reuse == MAT_INITIAL_MATRIX){
2634     /* compute rownz_max for preallocation */
2635     for (imdex=0; imdex<nrecvs; imdex++){
2636       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2637       rptr = rbuf_j[imdex];
2638       for (i=0; i<j; i++){
2639         ncols = rptr[i+1] - rptr[i];
2640         if (rownz_max < ncols) rownz_max = ncols;
2641       }
2642     }
2643 
2644     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2645     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2646     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2647     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2648     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2649   } else {
2650     C = *matredundant;
2651   }
2652 
2653   /* insert local matrix entries */
2654   rptr = sbuf_j;
2655   cols = sbuf_j + rend-rstart + 1;
2656   vals = sbuf_a;
2657   for (i=0; i<rend-rstart; i++){
2658     row   = i + rstart;
2659     ncols = rptr[i+1] - rptr[i];
2660     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2661     vals += ncols;
2662     cols += ncols;
2663   }
2664   /* insert received matrix entries */
2665   for (imdex=0; imdex<nrecvs; imdex++){
2666     rstart = rowrange[recv_rank[imdex]];
2667     rend   = rowrange[recv_rank[imdex]+1];
2668     rptr = rbuf_j[imdex];
2669     cols = rbuf_j[imdex] + rend-rstart + 1;
2670     vals = rbuf_a[imdex];
2671     for (i=0; i<rend-rstart; i++){
2672       row   = i + rstart;
2673       ncols = rptr[i+1] - rptr[i];
2674       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2675       vals += ncols;
2676       cols += ncols;
2677     }
2678   }
2679   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2681   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2682   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2683   if (reuse == MAT_INITIAL_MATRIX){
2684     PetscContainer container;
2685     *matredundant = C;
2686     /* create a supporting struct and attach it to C for reuse */
2687     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2688     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2689     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2690     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2691     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2692 
2693     redund->nzlocal = nzlocal;
2694     redund->nsends  = nsends;
2695     redund->nrecvs  = nrecvs;
2696     redund->send_rank = send_rank;
2697     redund->recv_rank = recv_rank;
2698     redund->sbuf_nz = sbuf_nz;
2699     redund->rbuf_nz = rbuf_nz;
2700     redund->sbuf_j  = sbuf_j;
2701     redund->sbuf_a  = sbuf_a;
2702     redund->rbuf_j  = rbuf_j;
2703     redund->rbuf_a  = rbuf_a;
2704 
2705     redund->MatDestroy = C->ops->destroy;
2706     C->ops->destroy    = MatDestroy_MatRedundant;
2707   }
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2713 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2714 {
2715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2716   PetscErrorCode ierr;
2717   PetscInt       i,*idxb = 0;
2718   PetscScalar    *va,*vb;
2719   Vec            vtmp;
2720 
2721   PetscFunctionBegin;
2722   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2723   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2724   if (idx) {
2725     for (i=0; i<A->rmap->n; i++) {
2726       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2727     }
2728   }
2729 
2730   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2731   if (idx) {
2732     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2733   }
2734   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2735   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2736 
2737   for (i=0; i<A->rmap->n; i++){
2738     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2739       va[i] = vb[i];
2740       if (idx) idx[i] = a->garray[idxb[i]];
2741     }
2742   }
2743 
2744   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2745   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2746   ierr = PetscFree(idxb);CHKERRQ(ierr);
2747   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2753 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2754 {
2755   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2756   PetscErrorCode ierr;
2757   PetscInt       i,*idxb = 0;
2758   PetscScalar    *va,*vb;
2759   Vec            vtmp;
2760 
2761   PetscFunctionBegin;
2762   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2763   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2764   if (idx) {
2765     for (i=0; i<A->cmap->n; i++) {
2766       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2767     }
2768   }
2769 
2770   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2771   if (idx) {
2772     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2773   }
2774   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2775   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2776 
2777   for (i=0; i<A->rmap->n; i++){
2778     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2779       va[i] = vb[i];
2780       if (idx) idx[i] = a->garray[idxb[i]];
2781     }
2782   }
2783 
2784   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2785   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2786   ierr = PetscFree(idxb);CHKERRQ(ierr);
2787   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 #undef __FUNCT__
2792 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2793 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2794 {
2795   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2796   PetscInt       n      = A->rmap->n;
2797   PetscInt       cstart = A->cmap->rstart;
2798   PetscInt      *cmap   = mat->garray;
2799   PetscInt      *diagIdx, *offdiagIdx;
2800   Vec            diagV, offdiagV;
2801   PetscScalar   *a, *diagA, *offdiagA;
2802   PetscInt       r;
2803   PetscErrorCode ierr;
2804 
2805   PetscFunctionBegin;
2806   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2807   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2808   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2809   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2810   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2811   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2812   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2813   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2814   for(r = 0; r < n; ++r) {
2815     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2816       a[r]   = diagA[r];
2817       idx[r] = cstart + diagIdx[r];
2818     } else {
2819       a[r]   = offdiagA[r];
2820       idx[r] = cmap[offdiagIdx[r]];
2821     }
2822   }
2823   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2824   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2825   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2826   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2827   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2828   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 #undef __FUNCT__
2833 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2834 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2835 {
2836   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2837   PetscInt       n      = A->rmap->n;
2838   PetscInt       cstart = A->cmap->rstart;
2839   PetscInt      *cmap   = mat->garray;
2840   PetscInt      *diagIdx, *offdiagIdx;
2841   Vec            diagV, offdiagV;
2842   PetscScalar   *a, *diagA, *offdiagA;
2843   PetscInt       r;
2844   PetscErrorCode ierr;
2845 
2846   PetscFunctionBegin;
2847   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2848   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2849   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2850   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2851   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2852   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2853   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2854   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2855   for(r = 0; r < n; ++r) {
2856     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2857       a[r]   = diagA[r];
2858       idx[r] = cstart + diagIdx[r];
2859     } else {
2860       a[r]   = offdiagA[r];
2861       idx[r] = cmap[offdiagIdx[r]];
2862     }
2863   }
2864   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2865   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2866   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2867   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2868   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2869   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 #undef __FUNCT__
2874 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2875 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2876 {
2877   PetscErrorCode ierr;
2878   Mat            *dummy;
2879 
2880   PetscFunctionBegin;
2881   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2882   *newmat = *dummy;
2883   ierr = PetscFree(dummy);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2888 /* -------------------------------------------------------------------*/
2889 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2890        MatGetRow_MPIAIJ,
2891        MatRestoreRow_MPIAIJ,
2892        MatMult_MPIAIJ,
2893 /* 4*/ MatMultAdd_MPIAIJ,
2894        MatMultTranspose_MPIAIJ,
2895        MatMultTransposeAdd_MPIAIJ,
2896 #ifdef PETSC_HAVE_PBGL
2897        MatSolve_MPIAIJ,
2898 #else
2899        0,
2900 #endif
2901        0,
2902        0,
2903 /*10*/ 0,
2904        0,
2905        0,
2906        MatSOR_MPIAIJ,
2907        MatTranspose_MPIAIJ,
2908 /*15*/ MatGetInfo_MPIAIJ,
2909        MatEqual_MPIAIJ,
2910        MatGetDiagonal_MPIAIJ,
2911        MatDiagonalScale_MPIAIJ,
2912        MatNorm_MPIAIJ,
2913 /*20*/ MatAssemblyBegin_MPIAIJ,
2914        MatAssemblyEnd_MPIAIJ,
2915        MatSetOption_MPIAIJ,
2916        MatZeroEntries_MPIAIJ,
2917 /*24*/ MatZeroRows_MPIAIJ,
2918        0,
2919 #ifdef PETSC_HAVE_PBGL
2920        0,
2921 #else
2922        0,
2923 #endif
2924        0,
2925        0,
2926 /*29*/ MatSetUpPreallocation_MPIAIJ,
2927 #ifdef PETSC_HAVE_PBGL
2928        0,
2929 #else
2930        0,
2931 #endif
2932        0,
2933        0,
2934        0,
2935 /*34*/ MatDuplicate_MPIAIJ,
2936        0,
2937        0,
2938        0,
2939        0,
2940 /*39*/ MatAXPY_MPIAIJ,
2941        MatGetSubMatrices_MPIAIJ,
2942        MatIncreaseOverlap_MPIAIJ,
2943        MatGetValues_MPIAIJ,
2944        MatCopy_MPIAIJ,
2945 /*44*/ MatGetRowMax_MPIAIJ,
2946        MatScale_MPIAIJ,
2947        0,
2948        0,
2949        MatZeroRowsColumns_MPIAIJ,
2950 /*49*/ MatSetBlockSize_MPIAIJ,
2951        0,
2952        0,
2953        0,
2954        0,
2955 /*54*/ MatFDColoringCreate_MPIAIJ,
2956        0,
2957        MatSetUnfactored_MPIAIJ,
2958        MatPermute_MPIAIJ,
2959        0,
2960 /*59*/ MatGetSubMatrix_MPIAIJ,
2961        MatDestroy_MPIAIJ,
2962        MatView_MPIAIJ,
2963        0,
2964        0,
2965 /*64*/ 0,
2966        0,
2967        0,
2968        0,
2969        0,
2970 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2971        MatGetRowMinAbs_MPIAIJ,
2972        0,
2973        MatSetColoring_MPIAIJ,
2974 #if defined(PETSC_HAVE_ADIC)
2975        MatSetValuesAdic_MPIAIJ,
2976 #else
2977        0,
2978 #endif
2979        MatSetValuesAdifor_MPIAIJ,
2980 /*75*/ MatFDColoringApply_AIJ,
2981        0,
2982        0,
2983        0,
2984        0,
2985 /*80*/ 0,
2986        0,
2987        0,
2988 /*83*/ MatLoad_MPIAIJ,
2989        0,
2990        0,
2991        0,
2992        0,
2993        0,
2994 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2995        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2996        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2997        MatPtAP_Basic,
2998        MatPtAPSymbolic_MPIAIJ,
2999 /*94*/ MatPtAPNumeric_MPIAIJ,
3000        0,
3001        0,
3002        0,
3003        0,
3004 /*99*/ 0,
3005        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3006        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3007        MatConjugate_MPIAIJ,
3008        0,
3009 /*104*/MatSetValuesRow_MPIAIJ,
3010        MatRealPart_MPIAIJ,
3011        MatImaginaryPart_MPIAIJ,
3012        0,
3013        0,
3014 /*109*/0,
3015        MatGetRedundantMatrix_MPIAIJ,
3016        MatGetRowMin_MPIAIJ,
3017        0,
3018        0,
3019 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3020        0,
3021        0,
3022        0,
3023        0,
3024 /*119*/0,
3025        0,
3026        0,
3027        0,
3028        MatGetMultiProcBlock_MPIAIJ,
3029 /*124*/MatFindNonZeroRows_MPIAIJ,
3030        0,
3031        0,
3032        0,
3033        MatGetSubMatricesParallel_MPIAIJ
3034 };
3035 
3036 /* ----------------------------------------------------------------------------------------*/
3037 
3038 EXTERN_C_BEGIN
3039 #undef __FUNCT__
3040 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3041 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3042 {
3043   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3044   PetscErrorCode ierr;
3045 
3046   PetscFunctionBegin;
3047   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3048   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3049   PetscFunctionReturn(0);
3050 }
3051 EXTERN_C_END
3052 
3053 EXTERN_C_BEGIN
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3056 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3057 {
3058   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3059   PetscErrorCode ierr;
3060 
3061   PetscFunctionBegin;
3062   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3063   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3064   PetscFunctionReturn(0);
3065 }
3066 EXTERN_C_END
3067 
3068 EXTERN_C_BEGIN
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3071 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3072 {
3073   Mat_MPIAIJ     *b;
3074   PetscErrorCode ierr;
3075   PetscInt       i;
3076 
3077   PetscFunctionBegin;
3078   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3079   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3080   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3081   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3082 
3083   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3084   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3085   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3086   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3087   if (d_nnz) {
3088     for (i=0; i<B->rmap->n; i++) {
3089       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3090     }
3091   }
3092   if (o_nnz) {
3093     for (i=0; i<B->rmap->n; i++) {
3094       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3095     }
3096   }
3097   b = (Mat_MPIAIJ*)B->data;
3098 
3099   if (!B->preallocated) {
3100     /* Explicitly create 2 MATSEQAIJ matrices. */
3101     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3102     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3103     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3104     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3105     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3106     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3107     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3108     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3109   }
3110 
3111   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3112   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3113   B->preallocated = PETSC_TRUE;
3114   PetscFunctionReturn(0);
3115 }
3116 EXTERN_C_END
3117 
3118 #undef __FUNCT__
3119 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3120 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3121 {
3122   Mat            mat;
3123   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3124   PetscErrorCode ierr;
3125 
3126   PetscFunctionBegin;
3127   *newmat       = 0;
3128   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3129   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3130   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3131   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3132   a    = (Mat_MPIAIJ*)mat->data;
3133 
3134   mat->factortype    = matin->factortype;
3135   mat->rmap->bs      = matin->rmap->bs;
3136   mat->assembled    = PETSC_TRUE;
3137   mat->insertmode   = NOT_SET_VALUES;
3138   mat->preallocated = PETSC_TRUE;
3139 
3140   a->size           = oldmat->size;
3141   a->rank           = oldmat->rank;
3142   a->donotstash     = oldmat->donotstash;
3143   a->roworiented    = oldmat->roworiented;
3144   a->rowindices     = 0;
3145   a->rowvalues      = 0;
3146   a->getrowactive   = PETSC_FALSE;
3147 
3148   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3149   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3150 
3151   if (oldmat->colmap) {
3152 #if defined (PETSC_USE_CTABLE)
3153     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3154 #else
3155     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3156     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3157     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3158 #endif
3159   } else a->colmap = 0;
3160   if (oldmat->garray) {
3161     PetscInt len;
3162     len  = oldmat->B->cmap->n;
3163     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3164     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3165     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3166   } else a->garray = 0;
3167 
3168   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3169   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3170   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3171   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3172   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3173   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3174   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3175   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3176   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3177   *newmat = mat;
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 /*
3182     Allows sending/receiving larger messages then 2 gigabytes in a single call
3183 */
3184 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3185 {
3186   int             ierr;
3187   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3188   PetscInt        i,numchunks;
3189   PetscMPIInt     icnt;
3190 
3191   numchunks = cnt/CHUNKSIZE + 1;
3192   for (i=0; i<numchunks; i++) {
3193     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3194     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3195     if (type == MPIU_INT) {
3196       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3197     } else if (type == MPIU_SCALAR) {
3198       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3199     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3200   }
3201   return 0;
3202 }
3203 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3204 {
3205   int             ierr;
3206   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3207   MPI_Status      status;
3208   PetscInt        i,numchunks;
3209   PetscMPIInt     icnt;
3210 
3211   numchunks = cnt/CHUNKSIZE + 1;
3212   for (i=0; i<numchunks; i++) {
3213     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3214     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3215     if (type == MPIU_INT) {
3216       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3217     } else if (type == MPIU_SCALAR) {
3218       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3219     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3220   }
3221   return 0;
3222 }
3223 
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatLoad_MPIAIJ"
3226 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3227 {
3228   PetscScalar    *vals,*svals;
3229   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3230   PetscErrorCode ierr;
3231   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3232   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3233   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3234   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3235   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3236   int            fd;
3237 
3238   PetscFunctionBegin;
3239   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3240   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3241   if (!rank) {
3242     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3243     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3244     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3245   }
3246 
3247   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3248 
3249   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3250   M = header[1]; N = header[2];
3251   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3252   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3253   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3254 
3255   /* If global sizes are set, check if they are consistent with that given in the file */
3256   if (sizesset) {
3257     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3258   }
3259   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3260   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3261 
3262   /* determine ownership of all rows */
3263   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3264   else m = newMat->rmap->n; /* Set by user */
3265 
3266   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3267   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3268 
3269   /* First process needs enough room for process with most rows */
3270   if (!rank) {
3271     mmax       = rowners[1];
3272     for (i=2; i<size; i++) {
3273       mmax = PetscMax(mmax,rowners[i]);
3274     }
3275   } else mmax = m;
3276 
3277   rowners[0] = 0;
3278   for (i=2; i<=size; i++) {
3279     rowners[i] += rowners[i-1];
3280   }
3281   rstart = rowners[rank];
3282   rend   = rowners[rank+1];
3283 
3284   /* distribute row lengths to all processors */
3285   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3286   if (!rank) {
3287     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3288     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3289     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3290     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3291     for (j=0; j<m; j++) {
3292       procsnz[0] += ourlens[j];
3293     }
3294     for (i=1; i<size; i++) {
3295       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3296       /* calculate the number of nonzeros on each processor */
3297       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3298         procsnz[i] += rowlengths[j];
3299       }
3300       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3301     }
3302     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3303   } else {
3304     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3305   }
3306 
3307   if (!rank) {
3308     /* determine max buffer needed and allocate it */
3309     maxnz = 0;
3310     for (i=0; i<size; i++) {
3311       maxnz = PetscMax(maxnz,procsnz[i]);
3312     }
3313     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3314 
3315     /* read in my part of the matrix column indices  */
3316     nz   = procsnz[0];
3317     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3318     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3319 
3320     /* read in every one elses and ship off */
3321     for (i=1; i<size; i++) {
3322       nz     = procsnz[i];
3323       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3324       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3325     }
3326     ierr = PetscFree(cols);CHKERRQ(ierr);
3327   } else {
3328     /* determine buffer space needed for message */
3329     nz = 0;
3330     for (i=0; i<m; i++) {
3331       nz += ourlens[i];
3332     }
3333     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3334 
3335     /* receive message of column indices*/
3336     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3337   }
3338 
3339   /* determine column ownership if matrix is not square */
3340   if (N != M) {
3341     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3342     else n = newMat->cmap->n;
3343     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3344     cstart = cend - n;
3345   } else {
3346     cstart = rstart;
3347     cend   = rend;
3348     n      = cend - cstart;
3349   }
3350 
3351   /* loop over local rows, determining number of off diagonal entries */
3352   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3353   jj = 0;
3354   for (i=0; i<m; i++) {
3355     for (j=0; j<ourlens[i]; j++) {
3356       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3357       jj++;
3358     }
3359   }
3360 
3361   for (i=0; i<m; i++) {
3362     ourlens[i] -= offlens[i];
3363   }
3364   if (!sizesset) {
3365     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3366   }
3367   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3368 
3369   for (i=0; i<m; i++) {
3370     ourlens[i] += offlens[i];
3371   }
3372 
3373   if (!rank) {
3374     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3375 
3376     /* read in my part of the matrix numerical values  */
3377     nz   = procsnz[0];
3378     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3379 
3380     /* insert into matrix */
3381     jj      = rstart;
3382     smycols = mycols;
3383     svals   = vals;
3384     for (i=0; i<m; i++) {
3385       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3386       smycols += ourlens[i];
3387       svals   += ourlens[i];
3388       jj++;
3389     }
3390 
3391     /* read in other processors and ship out */
3392     for (i=1; i<size; i++) {
3393       nz     = procsnz[i];
3394       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3395       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3396     }
3397     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3398   } else {
3399     /* receive numeric values */
3400     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3401 
3402     /* receive message of values*/
3403     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3404 
3405     /* insert into matrix */
3406     jj      = rstart;
3407     smycols = mycols;
3408     svals   = vals;
3409     for (i=0; i<m; i++) {
3410       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3411       smycols += ourlens[i];
3412       svals   += ourlens[i];
3413       jj++;
3414     }
3415   }
3416   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3417   ierr = PetscFree(vals);CHKERRQ(ierr);
3418   ierr = PetscFree(mycols);CHKERRQ(ierr);
3419   ierr = PetscFree(rowners);CHKERRQ(ierr);
3420 
3421   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3422   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 #undef __FUNCT__
3427 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3428 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3429 {
3430   PetscErrorCode ierr;
3431   IS             iscol_local;
3432   PetscInt       csize;
3433 
3434   PetscFunctionBegin;
3435   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3436   if (call == MAT_REUSE_MATRIX) {
3437     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3438     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3439   } else {
3440     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3441   }
3442   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3443   if (call == MAT_INITIAL_MATRIX) {
3444     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3445     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3446   }
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 #undef __FUNCT__
3451 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3452 /*
3453     Not great since it makes two copies of the submatrix, first an SeqAIJ
3454   in local and then by concatenating the local matrices the end result.
3455   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3456 
3457   Note: This requires a sequential iscol with all indices.
3458 */
3459 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3460 {
3461   PetscErrorCode ierr;
3462   PetscMPIInt    rank,size;
3463   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3464   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3465   Mat            *local,M,Mreuse;
3466   MatScalar      *vwork,*aa;
3467   MPI_Comm       comm = ((PetscObject)mat)->comm;
3468   Mat_SeqAIJ     *aij;
3469 
3470 
3471   PetscFunctionBegin;
3472   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3473   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3474 
3475   if (call ==  MAT_REUSE_MATRIX) {
3476     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3477     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3478     local = &Mreuse;
3479     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3480   } else {
3481     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3482     Mreuse = *local;
3483     ierr   = PetscFree(local);CHKERRQ(ierr);
3484   }
3485 
3486   /*
3487       m - number of local rows
3488       n - number of columns (same on all processors)
3489       rstart - first row in new global matrix generated
3490   */
3491   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3492   if (call == MAT_INITIAL_MATRIX) {
3493     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3494     ii  = aij->i;
3495     jj  = aij->j;
3496 
3497     /*
3498         Determine the number of non-zeros in the diagonal and off-diagonal
3499         portions of the matrix in order to do correct preallocation
3500     */
3501 
3502     /* first get start and end of "diagonal" columns */
3503     if (csize == PETSC_DECIDE) {
3504       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3505       if (mglobal == n) { /* square matrix */
3506 	nlocal = m;
3507       } else {
3508         nlocal = n/size + ((n % size) > rank);
3509       }
3510     } else {
3511       nlocal = csize;
3512     }
3513     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3514     rstart = rend - nlocal;
3515     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3516 
3517     /* next, compute all the lengths */
3518     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3519     olens = dlens + m;
3520     for (i=0; i<m; i++) {
3521       jend = ii[i+1] - ii[i];
3522       olen = 0;
3523       dlen = 0;
3524       for (j=0; j<jend; j++) {
3525         if (*jj < rstart || *jj >= rend) olen++;
3526         else dlen++;
3527         jj++;
3528       }
3529       olens[i] = olen;
3530       dlens[i] = dlen;
3531     }
3532     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3533     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3534     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3535     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3536     ierr = PetscFree(dlens);CHKERRQ(ierr);
3537   } else {
3538     PetscInt ml,nl;
3539 
3540     M = *newmat;
3541     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3542     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3543     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3544     /*
3545          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3546        rather than the slower MatSetValues().
3547     */
3548     M->was_assembled = PETSC_TRUE;
3549     M->assembled     = PETSC_FALSE;
3550   }
3551   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3552   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3553   ii  = aij->i;
3554   jj  = aij->j;
3555   aa  = aij->a;
3556   for (i=0; i<m; i++) {
3557     row   = rstart + i;
3558     nz    = ii[i+1] - ii[i];
3559     cwork = jj;     jj += nz;
3560     vwork = aa;     aa += nz;
3561     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3562   }
3563 
3564   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3565   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3566   *newmat = M;
3567 
3568   /* save submatrix used in processor for next request */
3569   if (call ==  MAT_INITIAL_MATRIX) {
3570     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3571     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3572   }
3573 
3574   PetscFunctionReturn(0);
3575 }
3576 
3577 EXTERN_C_BEGIN
3578 #undef __FUNCT__
3579 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3580 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3581 {
3582   PetscInt       m,cstart, cend,j,nnz,i,d;
3583   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3584   const PetscInt *JJ;
3585   PetscScalar    *values;
3586   PetscErrorCode ierr;
3587 
3588   PetscFunctionBegin;
3589   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3590 
3591   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3592   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3593   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3594   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3595   m      = B->rmap->n;
3596   cstart = B->cmap->rstart;
3597   cend   = B->cmap->rend;
3598   rstart = B->rmap->rstart;
3599 
3600   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3601 
3602 #if defined(PETSC_USE_DEBUGGING)
3603   for (i=0; i<m; i++) {
3604     nnz     = Ii[i+1]- Ii[i];
3605     JJ      = J + Ii[i];
3606     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3607     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3608     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3609   }
3610 #endif
3611 
3612   for (i=0; i<m; i++) {
3613     nnz     = Ii[i+1]- Ii[i];
3614     JJ      = J + Ii[i];
3615     nnz_max = PetscMax(nnz_max,nnz);
3616     d       = 0;
3617     for (j=0; j<nnz; j++) {
3618       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3619     }
3620     d_nnz[i] = d;
3621     o_nnz[i] = nnz - d;
3622   }
3623   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3624   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3625 
3626   if (v) values = (PetscScalar*)v;
3627   else {
3628     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3629     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3630   }
3631 
3632   for (i=0; i<m; i++) {
3633     ii   = i + rstart;
3634     nnz  = Ii[i+1]- Ii[i];
3635     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3636   }
3637   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3639 
3640   if (!v) {
3641     ierr = PetscFree(values);CHKERRQ(ierr);
3642   }
3643   PetscFunctionReturn(0);
3644 }
3645 EXTERN_C_END
3646 
3647 #undef __FUNCT__
3648 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3649 /*@
3650    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3651    (the default parallel PETSc format).
3652 
3653    Collective on MPI_Comm
3654 
3655    Input Parameters:
3656 +  B - the matrix
3657 .  i - the indices into j for the start of each local row (starts with zero)
3658 .  j - the column indices for each local row (starts with zero)
3659 -  v - optional values in the matrix
3660 
3661    Level: developer
3662 
3663    Notes:
3664        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3665      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3666      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3667 
3668        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3669 
3670        The format which is used for the sparse matrix input, is equivalent to a
3671     row-major ordering.. i.e for the following matrix, the input data expected is
3672     as shown:
3673 
3674         1 0 0
3675         2 0 3     P0
3676        -------
3677         4 5 6     P1
3678 
3679      Process0 [P0]: rows_owned=[0,1]
3680         i =  {0,1,3}  [size = nrow+1  = 2+1]
3681         j =  {0,0,2}  [size = nz = 6]
3682         v =  {1,2,3}  [size = nz = 6]
3683 
3684      Process1 [P1]: rows_owned=[2]
3685         i =  {0,3}    [size = nrow+1  = 1+1]
3686         j =  {0,1,2}  [size = nz = 6]
3687         v =  {4,5,6}  [size = nz = 6]
3688 
3689 .keywords: matrix, aij, compressed row, sparse, parallel
3690 
3691 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3692           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3693 @*/
3694 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3695 {
3696   PetscErrorCode ierr;
3697 
3698   PetscFunctionBegin;
3699   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3700   PetscFunctionReturn(0);
3701 }
3702 
3703 #undef __FUNCT__
3704 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3705 /*@C
3706    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3707    (the default parallel PETSc format).  For good matrix assembly performance
3708    the user should preallocate the matrix storage by setting the parameters
3709    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3710    performance can be increased by more than a factor of 50.
3711 
3712    Collective on MPI_Comm
3713 
3714    Input Parameters:
3715 +  A - the matrix
3716 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3717            (same value is used for all local rows)
3718 .  d_nnz - array containing the number of nonzeros in the various rows of the
3719            DIAGONAL portion of the local submatrix (possibly different for each row)
3720            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3721            The size of this array is equal to the number of local rows, i.e 'm'.
3722            You must leave room for the diagonal entry even if it is zero.
3723 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3724            submatrix (same value is used for all local rows).
3725 -  o_nnz - array containing the number of nonzeros in the various rows of the
3726            OFF-DIAGONAL portion of the local submatrix (possibly different for
3727            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3728            structure. The size of this array is equal to the number
3729            of local rows, i.e 'm'.
3730 
3731    If the *_nnz parameter is given then the *_nz parameter is ignored
3732 
3733    The AIJ format (also called the Yale sparse matrix format or
3734    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3735    storage.  The stored row and column indices begin with zero.
3736    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3737 
3738    The parallel matrix is partitioned such that the first m0 rows belong to
3739    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3740    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3741 
3742    The DIAGONAL portion of the local submatrix of a processor can be defined
3743    as the submatrix which is obtained by extraction the part corresponding to
3744    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3745    first row that belongs to the processor, r2 is the last row belonging to
3746    the this processor, and c1-c2 is range of indices of the local part of a
3747    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3748    common case of a square matrix, the row and column ranges are the same and
3749    the DIAGONAL part is also square. The remaining portion of the local
3750    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3751 
3752    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3753 
3754    You can call MatGetInfo() to get information on how effective the preallocation was;
3755    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3756    You can also run with the option -info and look for messages with the string
3757    malloc in them to see if additional memory allocation was needed.
3758 
3759    Example usage:
3760 
3761    Consider the following 8x8 matrix with 34 non-zero values, that is
3762    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3763    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3764    as follows:
3765 
3766 .vb
3767             1  2  0  |  0  3  0  |  0  4
3768     Proc0   0  5  6  |  7  0  0  |  8  0
3769             9  0 10  | 11  0  0  | 12  0
3770     -------------------------------------
3771            13  0 14  | 15 16 17  |  0  0
3772     Proc1   0 18  0  | 19 20 21  |  0  0
3773             0  0  0  | 22 23  0  | 24  0
3774     -------------------------------------
3775     Proc2  25 26 27  |  0  0 28  | 29  0
3776            30  0  0  | 31 32 33  |  0 34
3777 .ve
3778 
3779    This can be represented as a collection of submatrices as:
3780 
3781 .vb
3782       A B C
3783       D E F
3784       G H I
3785 .ve
3786 
3787    Where the submatrices A,B,C are owned by proc0, D,E,F are
3788    owned by proc1, G,H,I are owned by proc2.
3789 
3790    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3791    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3792    The 'M','N' parameters are 8,8, and have the same values on all procs.
3793 
3794    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3795    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3796    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3797    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3798    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3799    matrix, ans [DF] as another SeqAIJ matrix.
3800 
3801    When d_nz, o_nz parameters are specified, d_nz storage elements are
3802    allocated for every row of the local diagonal submatrix, and o_nz
3803    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3804    One way to choose d_nz and o_nz is to use the max nonzerors per local
3805    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3806    In this case, the values of d_nz,o_nz are:
3807 .vb
3808      proc0 : dnz = 2, o_nz = 2
3809      proc1 : dnz = 3, o_nz = 2
3810      proc2 : dnz = 1, o_nz = 4
3811 .ve
3812    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3813    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3814    for proc3. i.e we are using 12+15+10=37 storage locations to store
3815    34 values.
3816 
3817    When d_nnz, o_nnz parameters are specified, the storage is specified
3818    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3819    In the above case the values for d_nnz,o_nnz are:
3820 .vb
3821      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3822      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3823      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3824 .ve
3825    Here the space allocated is sum of all the above values i.e 34, and
3826    hence pre-allocation is perfect.
3827 
3828    Level: intermediate
3829 
3830 .keywords: matrix, aij, compressed row, sparse, parallel
3831 
3832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3833           MPIAIJ, MatGetInfo()
3834 @*/
3835 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3836 {
3837   PetscErrorCode ierr;
3838 
3839   PetscFunctionBegin;
3840   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 
3844 #undef __FUNCT__
3845 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3846 /*@
3847      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3848          CSR format the local rows.
3849 
3850    Collective on MPI_Comm
3851 
3852    Input Parameters:
3853 +  comm - MPI communicator
3854 .  m - number of local rows (Cannot be PETSC_DECIDE)
3855 .  n - This value should be the same as the local size used in creating the
3856        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3857        calculated if N is given) For square matrices n is almost always m.
3858 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3859 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3860 .   i - row indices
3861 .   j - column indices
3862 -   a - matrix values
3863 
3864    Output Parameter:
3865 .   mat - the matrix
3866 
3867    Level: intermediate
3868 
3869    Notes:
3870        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876        The format which is used for the sparse matrix input, is equivalent to a
3877     row-major ordering.. i.e for the following matrix, the input data expected is
3878     as shown:
3879 
3880         1 0 0
3881         2 0 3     P0
3882        -------
3883         4 5 6     P1
3884 
3885      Process0 [P0]: rows_owned=[0,1]
3886         i =  {0,1,3}  [size = nrow+1  = 2+1]
3887         j =  {0,0,2}  [size = nz = 6]
3888         v =  {1,2,3}  [size = nz = 6]
3889 
3890      Process1 [P1]: rows_owned=[2]
3891         i =  {0,3}    [size = nrow+1  = 1+1]
3892         j =  {0,1,2}  [size = nz = 6]
3893         v =  {4,5,6}  [size = nz = 6]
3894 
3895 .keywords: matrix, aij, compressed row, sparse, parallel
3896 
3897 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3898           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3899 @*/
3900 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3901 {
3902   PetscErrorCode ierr;
3903 
3904  PetscFunctionBegin;
3905   if (i[0]) {
3906     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3907   }
3908   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3909   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3910   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3911   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3912   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3913   PetscFunctionReturn(0);
3914 }
3915 
3916 #undef __FUNCT__
3917 #define __FUNCT__ "MatCreateMPIAIJ"
3918 /*@C
3919    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3920    (the default parallel PETSc format).  For good matrix assembly performance
3921    the user should preallocate the matrix storage by setting the parameters
3922    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3923    performance can be increased by more than a factor of 50.
3924 
3925    Collective on MPI_Comm
3926 
3927    Input Parameters:
3928 +  comm - MPI communicator
3929 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3930            This value should be the same as the local size used in creating the
3931            y vector for the matrix-vector product y = Ax.
3932 .  n - This value should be the same as the local size used in creating the
3933        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3934        calculated if N is given) For square matrices n is almost always m.
3935 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3936 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3937 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3938            (same value is used for all local rows)
3939 .  d_nnz - array containing the number of nonzeros in the various rows of the
3940            DIAGONAL portion of the local submatrix (possibly different for each row)
3941            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3942            The size of this array is equal to the number of local rows, i.e 'm'.
3943            You must leave room for the diagonal entry even if it is zero.
3944 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3945            submatrix (same value is used for all local rows).
3946 -  o_nnz - array containing the number of nonzeros in the various rows of the
3947            OFF-DIAGONAL portion of the local submatrix (possibly different for
3948            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3949            structure. The size of this array is equal to the number
3950            of local rows, i.e 'm'.
3951 
3952    Output Parameter:
3953 .  A - the matrix
3954 
3955    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3956    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3957    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3958 
3959    Notes:
3960    If the *_nnz parameter is given then the *_nz parameter is ignored
3961 
3962    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3963    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3964    storage requirements for this matrix.
3965 
3966    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3967    processor than it must be used on all processors that share the object for
3968    that argument.
3969 
3970    The user MUST specify either the local or global matrix dimensions
3971    (possibly both).
3972 
3973    The parallel matrix is partitioned across processors such that the
3974    first m0 rows belong to process 0, the next m1 rows belong to
3975    process 1, the next m2 rows belong to process 2 etc.. where
3976    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3977    values corresponding to [m x N] submatrix.
3978 
3979    The columns are logically partitioned with the n0 columns belonging
3980    to 0th partition, the next n1 columns belonging to the next
3981    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3982 
3983    The DIAGONAL portion of the local submatrix on any given processor
3984    is the submatrix corresponding to the rows and columns m,n
3985    corresponding to the given processor. i.e diagonal matrix on
3986    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3987    etc. The remaining portion of the local submatrix [m x (N-n)]
3988    constitute the OFF-DIAGONAL portion. The example below better
3989    illustrates this concept.
3990 
3991    For a square global matrix we define each processor's diagonal portion
3992    to be its local rows and the corresponding columns (a square submatrix);
3993    each processor's off-diagonal portion encompasses the remainder of the
3994    local matrix (a rectangular submatrix).
3995 
3996    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3997 
3998    When calling this routine with a single process communicator, a matrix of
3999    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4000    type of communicator, use the construction mechanism:
4001      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4002 
4003    By default, this format uses inodes (identical nodes) when possible.
4004    We search for consecutive rows with the same nonzero structure, thereby
4005    reusing matrix information to achieve increased efficiency.
4006 
4007    Options Database Keys:
4008 +  -mat_no_inode  - Do not use inodes
4009 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4010 -  -mat_aij_oneindex - Internally use indexing starting at 1
4011         rather than 0.  Note that when calling MatSetValues(),
4012         the user still MUST index entries starting at 0!
4013 
4014 
4015    Example usage:
4016 
4017    Consider the following 8x8 matrix with 34 non-zero values, that is
4018    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4019    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4020    as follows:
4021 
4022 .vb
4023             1  2  0  |  0  3  0  |  0  4
4024     Proc0   0  5  6  |  7  0  0  |  8  0
4025             9  0 10  | 11  0  0  | 12  0
4026     -------------------------------------
4027            13  0 14  | 15 16 17  |  0  0
4028     Proc1   0 18  0  | 19 20 21  |  0  0
4029             0  0  0  | 22 23  0  | 24  0
4030     -------------------------------------
4031     Proc2  25 26 27  |  0  0 28  | 29  0
4032            30  0  0  | 31 32 33  |  0 34
4033 .ve
4034 
4035    This can be represented as a collection of submatrices as:
4036 
4037 .vb
4038       A B C
4039       D E F
4040       G H I
4041 .ve
4042 
4043    Where the submatrices A,B,C are owned by proc0, D,E,F are
4044    owned by proc1, G,H,I are owned by proc2.
4045 
4046    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4047    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4048    The 'M','N' parameters are 8,8, and have the same values on all procs.
4049 
4050    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4051    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4052    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4053    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4054    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4055    matrix, ans [DF] as another SeqAIJ matrix.
4056 
4057    When d_nz, o_nz parameters are specified, d_nz storage elements are
4058    allocated for every row of the local diagonal submatrix, and o_nz
4059    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4060    One way to choose d_nz and o_nz is to use the max nonzerors per local
4061    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4062    In this case, the values of d_nz,o_nz are:
4063 .vb
4064      proc0 : dnz = 2, o_nz = 2
4065      proc1 : dnz = 3, o_nz = 2
4066      proc2 : dnz = 1, o_nz = 4
4067 .ve
4068    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4069    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4070    for proc3. i.e we are using 12+15+10=37 storage locations to store
4071    34 values.
4072 
4073    When d_nnz, o_nnz parameters are specified, the storage is specified
4074    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4075    In the above case the values for d_nnz,o_nnz are:
4076 .vb
4077      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4078      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4079      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4080 .ve
4081    Here the space allocated is sum of all the above values i.e 34, and
4082    hence pre-allocation is perfect.
4083 
4084    Level: intermediate
4085 
4086 .keywords: matrix, aij, compressed row, sparse, parallel
4087 
4088 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4089           MPIAIJ, MatCreateMPIAIJWithArrays()
4090 @*/
4091 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4092 {
4093   PetscErrorCode ierr;
4094   PetscMPIInt    size;
4095 
4096   PetscFunctionBegin;
4097   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4098   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4099   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4100   if (size > 1) {
4101     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4102     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4103   } else {
4104     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4105     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4106   }
4107   PetscFunctionReturn(0);
4108 }
4109 
4110 #undef __FUNCT__
4111 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4112 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4113 {
4114   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4115 
4116   PetscFunctionBegin;
4117   *Ad     = a->A;
4118   *Ao     = a->B;
4119   *colmap = a->garray;
4120   PetscFunctionReturn(0);
4121 }
4122 
4123 #undef __FUNCT__
4124 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4125 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4126 {
4127   PetscErrorCode ierr;
4128   PetscInt       i;
4129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4130 
4131   PetscFunctionBegin;
4132   if (coloring->ctype == IS_COLORING_GLOBAL) {
4133     ISColoringValue *allcolors,*colors;
4134     ISColoring      ocoloring;
4135 
4136     /* set coloring for diagonal portion */
4137     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4138 
4139     /* set coloring for off-diagonal portion */
4140     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4141     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4142     for (i=0; i<a->B->cmap->n; i++) {
4143       colors[i] = allcolors[a->garray[i]];
4144     }
4145     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4146     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4147     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4148     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4149   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4150     ISColoringValue *colors;
4151     PetscInt        *larray;
4152     ISColoring      ocoloring;
4153 
4154     /* set coloring for diagonal portion */
4155     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4156     for (i=0; i<a->A->cmap->n; i++) {
4157       larray[i] = i + A->cmap->rstart;
4158     }
4159     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4160     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4161     for (i=0; i<a->A->cmap->n; i++) {
4162       colors[i] = coloring->colors[larray[i]];
4163     }
4164     ierr = PetscFree(larray);CHKERRQ(ierr);
4165     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4166     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4167     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4168 
4169     /* set coloring for off-diagonal portion */
4170     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4171     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4172     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4173     for (i=0; i<a->B->cmap->n; i++) {
4174       colors[i] = coloring->colors[larray[i]];
4175     }
4176     ierr = PetscFree(larray);CHKERRQ(ierr);
4177     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4178     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4179     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4180   } else {
4181     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4182   }
4183 
4184   PetscFunctionReturn(0);
4185 }
4186 
4187 #if defined(PETSC_HAVE_ADIC)
4188 #undef __FUNCT__
4189 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4190 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4191 {
4192   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4193   PetscErrorCode ierr;
4194 
4195   PetscFunctionBegin;
4196   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4197   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4198   PetscFunctionReturn(0);
4199 }
4200 #endif
4201 
4202 #undef __FUNCT__
4203 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4204 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4205 {
4206   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4207   PetscErrorCode ierr;
4208 
4209   PetscFunctionBegin;
4210   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4211   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4212   PetscFunctionReturn(0);
4213 }
4214 
4215 #undef __FUNCT__
4216 #define __FUNCT__ "MatMerge"
4217 /*@
4218       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4219                  matrices from each processor
4220 
4221     Collective on MPI_Comm
4222 
4223    Input Parameters:
4224 +    comm - the communicators the parallel matrix will live on
4225 .    inmat - the input sequential matrices
4226 .    n - number of local columns (or PETSC_DECIDE)
4227 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4228 
4229    Output Parameter:
4230 .    outmat - the parallel matrix generated
4231 
4232     Level: advanced
4233 
4234    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4235 
4236 @*/
4237 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4238 {
4239   PetscErrorCode ierr;
4240   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4241   PetscInt       *indx;
4242   PetscScalar    *values;
4243 
4244   PetscFunctionBegin;
4245   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4246   if (scall == MAT_INITIAL_MATRIX){
4247     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4248     if (n == PETSC_DECIDE){
4249       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4250     }
4251     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4252     rstart -= m;
4253 
4254     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4255     for (i=0;i<m;i++) {
4256       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4257       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4258       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4259     }
4260     /* This routine will ONLY return MPIAIJ type matrix */
4261     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4262     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4263     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4264     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4265     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4266 
4267   } else if (scall == MAT_REUSE_MATRIX){
4268     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4269   } else {
4270     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4271   }
4272 
4273   for (i=0;i<m;i++) {
4274     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4275     Ii    = i + rstart;
4276     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4277     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4278   }
4279   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4280   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4281   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4282 
4283   PetscFunctionReturn(0);
4284 }
4285 
4286 #undef __FUNCT__
4287 #define __FUNCT__ "MatFileSplit"
4288 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4289 {
4290   PetscErrorCode    ierr;
4291   PetscMPIInt       rank;
4292   PetscInt          m,N,i,rstart,nnz;
4293   size_t            len;
4294   const PetscInt    *indx;
4295   PetscViewer       out;
4296   char              *name;
4297   Mat               B;
4298   const PetscScalar *values;
4299 
4300   PetscFunctionBegin;
4301   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4302   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4303   /* Should this be the type of the diagonal block of A? */
4304   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4305   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4306   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4307   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4308   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4309   for (i=0;i<m;i++) {
4310     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4311     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4312     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4313   }
4314   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4315   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4316 
4317   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4318   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4319   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4320   sprintf(name,"%s.%d",outfile,rank);
4321   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4322   ierr = PetscFree(name);
4323   ierr = MatView(B,out);CHKERRQ(ierr);
4324   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4325   ierr = MatDestroy(B);CHKERRQ(ierr);
4326   PetscFunctionReturn(0);
4327 }
4328 
4329 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4330 #undef __FUNCT__
4331 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4332 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4333 {
4334   PetscErrorCode       ierr;
4335   Mat_Merge_SeqsToMPI  *merge;
4336   PetscContainer       container;
4337 
4338   PetscFunctionBegin;
4339   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4340   if (container) {
4341     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4342     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4343     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4344     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4347     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4348     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4349     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4350     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4351     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4352     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4353     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4354     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4355 
4356     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4357     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4358   }
4359   ierr = PetscFree(merge);CHKERRQ(ierr);
4360 
4361   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4362   PetscFunctionReturn(0);
4363 }
4364 
4365 #include <../src/mat/utils/freespace.h>
4366 #include <petscbt.h>
4367 
4368 #undef __FUNCT__
4369 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4370 /*@C
4371       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4372                  matrices from each processor
4373 
4374     Collective on MPI_Comm
4375 
4376    Input Parameters:
4377 +    comm - the communicators the parallel matrix will live on
4378 .    seqmat - the input sequential matrices
4379 .    m - number of local rows (or PETSC_DECIDE)
4380 .    n - number of local columns (or PETSC_DECIDE)
4381 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4382 
4383    Output Parameter:
4384 .    mpimat - the parallel matrix generated
4385 
4386     Level: advanced
4387 
4388    Notes:
4389      The dimensions of the sequential matrix in each processor MUST be the same.
4390      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4391      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4392 @*/
4393 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4394 {
4395   PetscErrorCode       ierr;
4396   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4397   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4398   PetscMPIInt          size,rank,taga,*len_s;
4399   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4400   PetscInt             proc,m;
4401   PetscInt             **buf_ri,**buf_rj;
4402   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4403   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4404   MPI_Request          *s_waits,*r_waits;
4405   MPI_Status           *status;
4406   MatScalar            *aa=a->a;
4407   MatScalar            **abuf_r,*ba_i;
4408   Mat_Merge_SeqsToMPI  *merge;
4409   PetscContainer       container;
4410 
4411   PetscFunctionBegin;
4412   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4413 
4414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4415   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4416 
4417   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4418   if (container) {
4419     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4420   }
4421   bi     = merge->bi;
4422   bj     = merge->bj;
4423   buf_ri = merge->buf_ri;
4424   buf_rj = merge->buf_rj;
4425 
4426   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4427   owners = merge->rowmap->range;
4428   len_s  = merge->len_s;
4429 
4430   /* send and recv matrix values */
4431   /*-----------------------------*/
4432   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4433   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4434 
4435   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4436   for (proc=0,k=0; proc<size; proc++){
4437     if (!len_s[proc]) continue;
4438     i = owners[proc];
4439     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4440     k++;
4441   }
4442 
4443   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4444   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4445   ierr = PetscFree(status);CHKERRQ(ierr);
4446 
4447   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4448   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4449 
4450   /* insert mat values of mpimat */
4451   /*----------------------------*/
4452   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4453   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4454 
4455   for (k=0; k<merge->nrecv; k++){
4456     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4457     nrows = *(buf_ri_k[k]);
4458     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4459     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4460   }
4461 
4462   /* set values of ba */
4463   m = merge->rowmap->n;
4464   for (i=0; i<m; i++) {
4465     arow = owners[rank] + i;
4466     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4467     bnzi = bi[i+1] - bi[i];
4468     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4469 
4470     /* add local non-zero vals of this proc's seqmat into ba */
4471     anzi = ai[arow+1] - ai[arow];
4472     aj   = a->j + ai[arow];
4473     aa   = a->a + ai[arow];
4474     nextaj = 0;
4475     for (j=0; nextaj<anzi; j++){
4476       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4477         ba_i[j] += aa[nextaj++];
4478       }
4479     }
4480 
4481     /* add received vals into ba */
4482     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4483       /* i-th row */
4484       if (i == *nextrow[k]) {
4485         anzi = *(nextai[k]+1) - *nextai[k];
4486         aj   = buf_rj[k] + *(nextai[k]);
4487         aa   = abuf_r[k] + *(nextai[k]);
4488         nextaj = 0;
4489         for (j=0; nextaj<anzi; j++){
4490           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4491             ba_i[j] += aa[nextaj++];
4492           }
4493         }
4494         nextrow[k]++; nextai[k]++;
4495       }
4496     }
4497     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4498   }
4499   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4500   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501 
4502   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4503   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4504   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4505   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4506   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4507   PetscFunctionReturn(0);
4508 }
4509 
4510 #undef __FUNCT__
4511 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4512 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4513 {
4514   PetscErrorCode       ierr;
4515   Mat                  B_mpi;
4516   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4517   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4518   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4519   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4520   PetscInt             len,proc,*dnz,*onz;
4521   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4522   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4523   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4524   MPI_Status           *status;
4525   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4526   PetscBT              lnkbt;
4527   Mat_Merge_SeqsToMPI  *merge;
4528   PetscContainer       container;
4529 
4530   PetscFunctionBegin;
4531   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4532 
4533   /* make sure it is a PETSc comm */
4534   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4535   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4536   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4537 
4538   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4539   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4540 
4541   /* determine row ownership */
4542   /*---------------------------------------------------------*/
4543   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4544   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4545   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4546   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4547   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4548   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4549   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4550 
4551   m      = merge->rowmap->n;
4552   M      = merge->rowmap->N;
4553   owners = merge->rowmap->range;
4554 
4555   /* determine the number of messages to send, their lengths */
4556   /*---------------------------------------------------------*/
4557   len_s  = merge->len_s;
4558 
4559   len = 0;  /* length of buf_si[] */
4560   merge->nsend = 0;
4561   for (proc=0; proc<size; proc++){
4562     len_si[proc] = 0;
4563     if (proc == rank){
4564       len_s[proc] = 0;
4565     } else {
4566       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4567       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4568     }
4569     if (len_s[proc]) {
4570       merge->nsend++;
4571       nrows = 0;
4572       for (i=owners[proc]; i<owners[proc+1]; i++){
4573         if (ai[i+1] > ai[i]) nrows++;
4574       }
4575       len_si[proc] = 2*(nrows+1);
4576       len += len_si[proc];
4577     }
4578   }
4579 
4580   /* determine the number and length of messages to receive for ij-structure */
4581   /*-------------------------------------------------------------------------*/
4582   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4583   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4584 
4585   /* post the Irecv of j-structure */
4586   /*-------------------------------*/
4587   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4588   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4589 
4590   /* post the Isend of j-structure */
4591   /*--------------------------------*/
4592   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4593 
4594   for (proc=0, k=0; proc<size; proc++){
4595     if (!len_s[proc]) continue;
4596     i = owners[proc];
4597     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4598     k++;
4599   }
4600 
4601   /* receives and sends of j-structure are complete */
4602   /*------------------------------------------------*/
4603   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4604   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4605 
4606   /* send and recv i-structure */
4607   /*---------------------------*/
4608   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4609   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4610 
4611   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4612   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4613   for (proc=0,k=0; proc<size; proc++){
4614     if (!len_s[proc]) continue;
4615     /* form outgoing message for i-structure:
4616          buf_si[0]:                 nrows to be sent
4617                [1:nrows]:           row index (global)
4618                [nrows+1:2*nrows+1]: i-structure index
4619     */
4620     /*-------------------------------------------*/
4621     nrows = len_si[proc]/2 - 1;
4622     buf_si_i    = buf_si + nrows+1;
4623     buf_si[0]   = nrows;
4624     buf_si_i[0] = 0;
4625     nrows = 0;
4626     for (i=owners[proc]; i<owners[proc+1]; i++){
4627       anzi = ai[i+1] - ai[i];
4628       if (anzi) {
4629         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4630         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4631         nrows++;
4632       }
4633     }
4634     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4635     k++;
4636     buf_si += len_si[proc];
4637   }
4638 
4639   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4640   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4641 
4642   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4643   for (i=0; i<merge->nrecv; i++){
4644     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4645   }
4646 
4647   ierr = PetscFree(len_si);CHKERRQ(ierr);
4648   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4649   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4650   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4651   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4652   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4653   ierr = PetscFree(status);CHKERRQ(ierr);
4654 
4655   /* compute a local seq matrix in each processor */
4656   /*----------------------------------------------*/
4657   /* allocate bi array and free space for accumulating nonzero column info */
4658   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4659   bi[0] = 0;
4660 
4661   /* create and initialize a linked list */
4662   nlnk = N+1;
4663   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4664 
4665   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4666   len = 0;
4667   len  = ai[owners[rank+1]] - ai[owners[rank]];
4668   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4669   current_space = free_space;
4670 
4671   /* determine symbolic info for each local row */
4672   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4673 
4674   for (k=0; k<merge->nrecv; k++){
4675     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4676     nrows = *buf_ri_k[k];
4677     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4678     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4679   }
4680 
4681   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4682   len = 0;
4683   for (i=0;i<m;i++) {
4684     bnzi   = 0;
4685     /* add local non-zero cols of this proc's seqmat into lnk */
4686     arow   = owners[rank] + i;
4687     anzi   = ai[arow+1] - ai[arow];
4688     aj     = a->j + ai[arow];
4689     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4690     bnzi += nlnk;
4691     /* add received col data into lnk */
4692     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4693       if (i == *nextrow[k]) { /* i-th row */
4694         anzi = *(nextai[k]+1) - *nextai[k];
4695         aj   = buf_rj[k] + *nextai[k];
4696         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4697         bnzi += nlnk;
4698         nextrow[k]++; nextai[k]++;
4699       }
4700     }
4701     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4702 
4703     /* if free space is not available, make more free space */
4704     if (current_space->local_remaining<bnzi) {
4705       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4706       nspacedouble++;
4707     }
4708     /* copy data into free space, then initialize lnk */
4709     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4710     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4711 
4712     current_space->array           += bnzi;
4713     current_space->local_used      += bnzi;
4714     current_space->local_remaining -= bnzi;
4715 
4716     bi[i+1] = bi[i] + bnzi;
4717   }
4718 
4719   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4720 
4721   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4722   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4723   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4724 
4725   /* create symbolic parallel matrix B_mpi */
4726   /*---------------------------------------*/
4727   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4728   if (n==PETSC_DECIDE) {
4729     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4730   } else {
4731     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4732   }
4733   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4734   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4735   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4736 
4737   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4738   B_mpi->assembled     = PETSC_FALSE;
4739   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4740   merge->bi            = bi;
4741   merge->bj            = bj;
4742   merge->buf_ri        = buf_ri;
4743   merge->buf_rj        = buf_rj;
4744   merge->coi           = PETSC_NULL;
4745   merge->coj           = PETSC_NULL;
4746   merge->owners_co     = PETSC_NULL;
4747 
4748   /* attach the supporting struct to B_mpi for reuse */
4749   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4750   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4751   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4752   *mpimat = B_mpi;
4753 
4754   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4755   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4756   PetscFunctionReturn(0);
4757 }
4758 
4759 #undef __FUNCT__
4760 #define __FUNCT__ "MatMerge_SeqsToMPI"
4761 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4762 {
4763   PetscErrorCode   ierr;
4764 
4765   PetscFunctionBegin;
4766   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4767   if (scall == MAT_INITIAL_MATRIX){
4768     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4769   }
4770   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4771   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4772   PetscFunctionReturn(0);
4773 }
4774 
4775 #undef __FUNCT__
4776 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4777 /*@
4778      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4779           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4780           with MatGetSize()
4781 
4782     Not Collective
4783 
4784    Input Parameters:
4785 +    A - the matrix
4786 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4787 
4788    Output Parameter:
4789 .    A_loc - the local sequential matrix generated
4790 
4791     Level: developer
4792 
4793 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4794 
4795 @*/
4796 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4797 {
4798   PetscErrorCode  ierr;
4799   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4800   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4801   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4802   MatScalar       *aa=a->a,*ba=b->a,*cam;
4803   PetscScalar     *ca;
4804   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4805   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4806   PetscBool       match;
4807 
4808   PetscFunctionBegin;
4809   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4810   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4811   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4812   if (scall == MAT_INITIAL_MATRIX){
4813     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4814     ci[0] = 0;
4815     for (i=0; i<am; i++){
4816       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4817     }
4818     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4819     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4820     k = 0;
4821     for (i=0; i<am; i++) {
4822       ncols_o = bi[i+1] - bi[i];
4823       ncols_d = ai[i+1] - ai[i];
4824       /* off-diagonal portion of A */
4825       for (jo=0; jo<ncols_o; jo++) {
4826         col = cmap[*bj];
4827         if (col >= cstart) break;
4828         cj[k]   = col; bj++;
4829         ca[k++] = *ba++;
4830       }
4831       /* diagonal portion of A */
4832       for (j=0; j<ncols_d; j++) {
4833         cj[k]   = cstart + *aj++;
4834         ca[k++] = *aa++;
4835       }
4836       /* off-diagonal portion of A */
4837       for (j=jo; j<ncols_o; j++) {
4838         cj[k]   = cmap[*bj++];
4839         ca[k++] = *ba++;
4840       }
4841     }
4842     /* put together the new matrix */
4843     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4844     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4845     /* Since these are PETSc arrays, change flags to free them as necessary. */
4846     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4847     mat->free_a  = PETSC_TRUE;
4848     mat->free_ij = PETSC_TRUE;
4849     mat->nonew   = 0;
4850   } else if (scall == MAT_REUSE_MATRIX){
4851     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4852     ci = mat->i; cj = mat->j; cam = mat->a;
4853     for (i=0; i<am; i++) {
4854       /* off-diagonal portion of A */
4855       ncols_o = bi[i+1] - bi[i];
4856       for (jo=0; jo<ncols_o; jo++) {
4857         col = cmap[*bj];
4858         if (col >= cstart) break;
4859         *cam++ = *ba++; bj++;
4860       }
4861       /* diagonal portion of A */
4862       ncols_d = ai[i+1] - ai[i];
4863       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4864       /* off-diagonal portion of A */
4865       for (j=jo; j<ncols_o; j++) {
4866         *cam++ = *ba++; bj++;
4867       }
4868     }
4869   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4870   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4871   PetscFunctionReturn(0);
4872 }
4873 
4874 #undef __FUNCT__
4875 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4876 /*@C
4877      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4878 
4879     Not Collective
4880 
4881    Input Parameters:
4882 +    A - the matrix
4883 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4884 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4885 
4886    Output Parameter:
4887 .    A_loc - the local sequential matrix generated
4888 
4889     Level: developer
4890 
4891 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4892 
4893 @*/
4894 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4895 {
4896   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4897   PetscErrorCode    ierr;
4898   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4899   IS                isrowa,iscola;
4900   Mat               *aloc;
4901   PetscBool       match;
4902 
4903   PetscFunctionBegin;
4904   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4905   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4906   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4907   if (!row){
4908     start = A->rmap->rstart; end = A->rmap->rend;
4909     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4910   } else {
4911     isrowa = *row;
4912   }
4913   if (!col){
4914     start = A->cmap->rstart;
4915     cmap  = a->garray;
4916     nzA   = a->A->cmap->n;
4917     nzB   = a->B->cmap->n;
4918     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4919     ncols = 0;
4920     for (i=0; i<nzB; i++) {
4921       if (cmap[i] < start) idx[ncols++] = cmap[i];
4922       else break;
4923     }
4924     imark = i;
4925     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4926     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4927     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4928   } else {
4929     iscola = *col;
4930   }
4931   if (scall != MAT_INITIAL_MATRIX){
4932     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4933     aloc[0] = *A_loc;
4934   }
4935   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4936   *A_loc = aloc[0];
4937   ierr = PetscFree(aloc);CHKERRQ(ierr);
4938   if (!row){
4939     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4940   }
4941   if (!col){
4942     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4943   }
4944   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4945   PetscFunctionReturn(0);
4946 }
4947 
4948 #undef __FUNCT__
4949 #define __FUNCT__ "MatGetBrowsOfAcols"
4950 /*@C
4951     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4952 
4953     Collective on Mat
4954 
4955    Input Parameters:
4956 +    A,B - the matrices in mpiaij format
4957 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4959 
4960    Output Parameter:
4961 +    rowb, colb - index sets of rows and columns of B to extract
4962 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4963 -    B_seq - the sequential matrix generated
4964 
4965     Level: developer
4966 
4967 @*/
4968 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4969 {
4970   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4971   PetscErrorCode    ierr;
4972   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4973   IS                isrowb,iscolb;
4974   Mat               *bseq;
4975 
4976   PetscFunctionBegin;
4977   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4978     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4979   }
4980   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4981 
4982   if (scall == MAT_INITIAL_MATRIX){
4983     start = A->cmap->rstart;
4984     cmap  = a->garray;
4985     nzA   = a->A->cmap->n;
4986     nzB   = a->B->cmap->n;
4987     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4988     ncols = 0;
4989     for (i=0; i<nzB; i++) {  /* row < local row index */
4990       if (cmap[i] < start) idx[ncols++] = cmap[i];
4991       else break;
4992     }
4993     imark = i;
4994     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4995     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4996     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4997     *brstart = imark;
4998     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4999   } else {
5000     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5001     isrowb = *rowb; iscolb = *colb;
5002     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5003     bseq[0] = *B_seq;
5004   }
5005   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5006   *B_seq = bseq[0];
5007   ierr = PetscFree(bseq);CHKERRQ(ierr);
5008   if (!rowb){
5009     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
5010   } else {
5011     *rowb = isrowb;
5012   }
5013   if (!colb){
5014     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
5015   } else {
5016     *colb = iscolb;
5017   }
5018   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatGetBrowsOfAoCols"
5024 /*@C
5025     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5026     of the OFF-DIAGONAL portion of local A
5027 
5028     Collective on Mat
5029 
5030    Input Parameters:
5031 +    A,B - the matrices in mpiaij format
5032 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5033 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5034 .    startsj_r - similar to startsj for receives
5035 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5036 
5037    Output Parameter:
5038 +    B_oth - the sequential matrix generated
5039 
5040     Level: developer
5041 
5042 @*/
5043 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5044 {
5045   VecScatter_MPI_General *gen_to,*gen_from;
5046   PetscErrorCode         ierr;
5047   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5048   Mat_SeqAIJ             *b_oth;
5049   VecScatter             ctx=a->Mvctx;
5050   MPI_Comm               comm=((PetscObject)ctx)->comm;
5051   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5052   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5053   PetscScalar            *rvalues,*svalues;
5054   MatScalar              *b_otha,*bufa,*bufA;
5055   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5056   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5057   MPI_Status             *sstatus,rstatus;
5058   PetscMPIInt            jj;
5059   PetscInt               *cols,sbs,rbs;
5060   PetscScalar            *vals;
5061 
5062   PetscFunctionBegin;
5063   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5064     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5065   }
5066   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5068 
5069   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5070   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5071   rvalues  = gen_from->values; /* holds the length of receiving row */
5072   svalues  = gen_to->values;   /* holds the length of sending row */
5073   nrecvs   = gen_from->n;
5074   nsends   = gen_to->n;
5075 
5076   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5077   srow     = gen_to->indices;   /* local row index to be sent */
5078   sstarts  = gen_to->starts;
5079   sprocs   = gen_to->procs;
5080   sstatus  = gen_to->sstatus;
5081   sbs      = gen_to->bs;
5082   rstarts  = gen_from->starts;
5083   rprocs   = gen_from->procs;
5084   rbs      = gen_from->bs;
5085 
5086   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5087   if (scall == MAT_INITIAL_MATRIX){
5088     /* i-array */
5089     /*---------*/
5090     /*  post receives */
5091     for (i=0; i<nrecvs; i++){
5092       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5093       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5094       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5095     }
5096 
5097     /* pack the outgoing message */
5098     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5099     sstartsj[0] = 0;  rstartsj[0] = 0;
5100     len = 0; /* total length of j or a array to be sent */
5101     k = 0;
5102     for (i=0; i<nsends; i++){
5103       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5104       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5105       for (j=0; j<nrows; j++) {
5106         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5107         for (l=0; l<sbs; l++){
5108           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5109           rowlen[j*sbs+l] = ncols;
5110           len += ncols;
5111           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5112         }
5113         k++;
5114       }
5115       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5116       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5117     }
5118     /* recvs and sends of i-array are completed */
5119     i = nrecvs;
5120     while (i--) {
5121       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5122     }
5123     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5124 
5125     /* allocate buffers for sending j and a arrays */
5126     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5127     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5128 
5129     /* create i-array of B_oth */
5130     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5131     b_othi[0] = 0;
5132     len = 0; /* total length of j or a array to be received */
5133     k = 0;
5134     for (i=0; i<nrecvs; i++){
5135       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5136       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5137       for (j=0; j<nrows; j++) {
5138         b_othi[k+1] = b_othi[k] + rowlen[j];
5139         len += rowlen[j]; k++;
5140       }
5141       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5142     }
5143 
5144     /* allocate space for j and a arrrays of B_oth */
5145     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5146     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5147 
5148     /* j-array */
5149     /*---------*/
5150     /*  post receives of j-array */
5151     for (i=0; i<nrecvs; i++){
5152       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5153       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5154     }
5155 
5156     /* pack the outgoing message j-array */
5157     k = 0;
5158     for (i=0; i<nsends; i++){
5159       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5160       bufJ = bufj+sstartsj[i];
5161       for (j=0; j<nrows; j++) {
5162         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5163         for (ll=0; ll<sbs; ll++){
5164           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5165           for (l=0; l<ncols; l++){
5166             *bufJ++ = cols[l];
5167           }
5168           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5169         }
5170       }
5171       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5172     }
5173 
5174     /* recvs and sends of j-array are completed */
5175     i = nrecvs;
5176     while (i--) {
5177       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5178     }
5179     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5180   } else if (scall == MAT_REUSE_MATRIX){
5181     sstartsj = *startsj;
5182     rstartsj = *startsj_r;
5183     bufa     = *bufa_ptr;
5184     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5185     b_otha   = b_oth->a;
5186   } else {
5187     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5188   }
5189 
5190   /* a-array */
5191   /*---------*/
5192   /*  post receives of a-array */
5193   for (i=0; i<nrecvs; i++){
5194     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5195     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5196   }
5197 
5198   /* pack the outgoing message a-array */
5199   k = 0;
5200   for (i=0; i<nsends; i++){
5201     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5202     bufA = bufa+sstartsj[i];
5203     for (j=0; j<nrows; j++) {
5204       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5205       for (ll=0; ll<sbs; ll++){
5206         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5207         for (l=0; l<ncols; l++){
5208           *bufA++ = vals[l];
5209         }
5210         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5211       }
5212     }
5213     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5214   }
5215   /* recvs and sends of a-array are completed */
5216   i = nrecvs;
5217   while (i--) {
5218     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5219   }
5220   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5221   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5222 
5223   if (scall == MAT_INITIAL_MATRIX){
5224     /* put together the new matrix */
5225     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5226 
5227     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5228     /* Since these are PETSc arrays, change flags to free them as necessary. */
5229     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5230     b_oth->free_a  = PETSC_TRUE;
5231     b_oth->free_ij = PETSC_TRUE;
5232     b_oth->nonew   = 0;
5233 
5234     ierr = PetscFree(bufj);CHKERRQ(ierr);
5235     if (!startsj || !bufa_ptr){
5236       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5237       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5238     } else {
5239       *startsj   = sstartsj;
5240       *startsj_r = rstartsj;
5241       *bufa_ptr  = bufa;
5242     }
5243   }
5244   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5245   PetscFunctionReturn(0);
5246 }
5247 
5248 #undef __FUNCT__
5249 #define __FUNCT__ "MatGetCommunicationStructs"
5250 /*@C
5251   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5252 
5253   Not Collective
5254 
5255   Input Parameters:
5256 . A - The matrix in mpiaij format
5257 
5258   Output Parameter:
5259 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5260 . colmap - A map from global column index to local index into lvec
5261 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5262 
5263   Level: developer
5264 
5265 @*/
5266 #if defined (PETSC_USE_CTABLE)
5267 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5268 #else
5269 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5270 #endif
5271 {
5272   Mat_MPIAIJ *a;
5273 
5274   PetscFunctionBegin;
5275   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5276   PetscValidPointer(lvec, 2);
5277   PetscValidPointer(colmap, 3);
5278   PetscValidPointer(multScatter, 4);
5279   a = (Mat_MPIAIJ *) A->data;
5280   if (lvec) *lvec = a->lvec;
5281   if (colmap) *colmap = a->colmap;
5282   if (multScatter) *multScatter = a->Mvctx;
5283   PetscFunctionReturn(0);
5284 }
5285 
5286 EXTERN_C_BEGIN
5287 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5288 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5289 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5290 EXTERN_C_END
5291 
5292 #undef __FUNCT__
5293 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5294 /*
5295     Computes (B'*A')' since computing B*A directly is untenable
5296 
5297                n                       p                          p
5298         (              )       (              )         (                  )
5299       m (      A       )  *  n (       B      )   =   m (         C        )
5300         (              )       (              )         (                  )
5301 
5302 */
5303 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5304 {
5305   PetscErrorCode     ierr;
5306   Mat                At,Bt,Ct;
5307 
5308   PetscFunctionBegin;
5309   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5310   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5311   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5312   ierr = MatDestroy(At);CHKERRQ(ierr);
5313   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5314   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5315   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5316   PetscFunctionReturn(0);
5317 }
5318 
5319 #undef __FUNCT__
5320 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5321 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5322 {
5323   PetscErrorCode ierr;
5324   PetscInt       m=A->rmap->n,n=B->cmap->n;
5325   Mat            Cmat;
5326 
5327   PetscFunctionBegin;
5328   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5329   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5330   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5331   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5332   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5333   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5334   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5335   *C   = Cmat;
5336   PetscFunctionReturn(0);
5337 }
5338 
5339 /* ----------------------------------------------------------------*/
5340 #undef __FUNCT__
5341 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5342 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5343 {
5344   PetscErrorCode ierr;
5345 
5346   PetscFunctionBegin;
5347   if (scall == MAT_INITIAL_MATRIX){
5348     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5349   }
5350   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5351   PetscFunctionReturn(0);
5352 }
5353 
5354 EXTERN_C_BEGIN
5355 #if defined(PETSC_HAVE_MUMPS)
5356 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5357 #endif
5358 #if defined(PETSC_HAVE_PASTIX)
5359 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5360 #endif
5361 #if defined(PETSC_HAVE_SUPERLU_DIST)
5362 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5363 #endif
5364 #if defined(PETSC_HAVE_SPOOLES)
5365 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5366 #endif
5367 EXTERN_C_END
5368 
5369 /*MC
5370    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5371 
5372    Options Database Keys:
5373 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5374 
5375   Level: beginner
5376 
5377 .seealso: MatCreateMPIAIJ()
5378 M*/
5379 
5380 EXTERN_C_BEGIN
5381 #undef __FUNCT__
5382 #define __FUNCT__ "MatCreate_MPIAIJ"
5383 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5384 {
5385   Mat_MPIAIJ     *b;
5386   PetscErrorCode ierr;
5387   PetscMPIInt    size;
5388 
5389   PetscFunctionBegin;
5390   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5391 
5392   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5393   B->data         = (void*)b;
5394   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5395   B->rmap->bs     = 1;
5396   B->assembled    = PETSC_FALSE;
5397 
5398   B->insertmode   = NOT_SET_VALUES;
5399   b->size         = size;
5400   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5401 
5402   /* build cache for off array entries formed */
5403   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5404   b->donotstash  = PETSC_FALSE;
5405   b->colmap      = 0;
5406   b->garray      = 0;
5407   b->roworiented = PETSC_TRUE;
5408 
5409   /* stuff used for matrix vector multiply */
5410   b->lvec      = PETSC_NULL;
5411   b->Mvctx     = PETSC_NULL;
5412 
5413   /* stuff for MatGetRow() */
5414   b->rowindices   = 0;
5415   b->rowvalues    = 0;
5416   b->getrowactive = PETSC_FALSE;
5417 
5418 #if defined(PETSC_HAVE_SPOOLES)
5419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5420                                      "MatGetFactor_mpiaij_spooles",
5421                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5422 #endif
5423 #if defined(PETSC_HAVE_MUMPS)
5424   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5425                                      "MatGetFactor_aij_mumps",
5426                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5427 #endif
5428 #if defined(PETSC_HAVE_PASTIX)
5429   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5430 					   "MatGetFactor_mpiaij_pastix",
5431 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5432 #endif
5433 #if defined(PETSC_HAVE_SUPERLU_DIST)
5434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5435                                      "MatGetFactor_mpiaij_superlu_dist",
5436                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5437 #endif
5438   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5439                                      "MatStoreValues_MPIAIJ",
5440                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5441   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5442                                      "MatRetrieveValues_MPIAIJ",
5443                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5444   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5445 				     "MatGetDiagonalBlock_MPIAIJ",
5446                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5447   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5448 				     "MatIsTranspose_MPIAIJ",
5449 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5450   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5451 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5452 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5453   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5454 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5455 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5456   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5457 				     "MatDiagonalScaleLocal_MPIAIJ",
5458 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5459   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5460                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5461                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5462   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5463                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5464                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5465   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5466                                      "MatConvert_MPIAIJ_MPISBAIJ",
5467                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5468   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5469                                      "MatMatMult_MPIDense_MPIAIJ",
5470                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5471   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5472                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5473                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5474   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5475                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5476                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5477   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5478   PetscFunctionReturn(0);
5479 }
5480 EXTERN_C_END
5481 
5482 #undef __FUNCT__
5483 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5484 /*@
5485      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5486          and "off-diagonal" part of the matrix in CSR format.
5487 
5488    Collective on MPI_Comm
5489 
5490    Input Parameters:
5491 +  comm - MPI communicator
5492 .  m - number of local rows (Cannot be PETSC_DECIDE)
5493 .  n - This value should be the same as the local size used in creating the
5494        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5495        calculated if N is given) For square matrices n is almost always m.
5496 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5497 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5498 .   i - row indices for "diagonal" portion of matrix
5499 .   j - column indices
5500 .   a - matrix values
5501 .   oi - row indices for "off-diagonal" portion of matrix
5502 .   oj - column indices
5503 -   oa - matrix values
5504 
5505    Output Parameter:
5506 .   mat - the matrix
5507 
5508    Level: advanced
5509 
5510    Notes:
5511        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5512        must free the arrays once the matrix has been destroyed and not before.
5513 
5514        The i and j indices are 0 based
5515 
5516        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5517 
5518        This sets local rows and cannot be used to set off-processor values.
5519 
5520        You cannot later use MatSetValues() to change values in this matrix.
5521 
5522 .keywords: matrix, aij, compressed row, sparse, parallel
5523 
5524 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5525           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5526 @*/
5527 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5528 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5529 {
5530   PetscErrorCode ierr;
5531   Mat_MPIAIJ     *maij;
5532 
5533  PetscFunctionBegin;
5534   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5535   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5536   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5537   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5538   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5539   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5540   maij = (Mat_MPIAIJ*) (*mat)->data;
5541   maij->donotstash     = PETSC_TRUE;
5542   (*mat)->preallocated = PETSC_TRUE;
5543 
5544   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5545   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5546   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5547   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5548 
5549   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5550   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5551 
5552   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5553   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5554   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5555   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5556 
5557   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5558   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5559   PetscFunctionReturn(0);
5560 }
5561 
5562 /*
5563     Special version for direct calls from Fortran
5564 */
5565 #include <private/fortranimpl.h>
5566 
5567 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5568 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5569 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5570 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5571 #endif
5572 
5573 /* Change these macros so can be used in void function */
5574 #undef CHKERRQ
5575 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5576 #undef SETERRQ2
5577 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5578 #undef SETERRQ
5579 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5580 
5581 EXTERN_C_BEGIN
5582 #undef __FUNCT__
5583 #define __FUNCT__ "matsetvaluesmpiaij_"
5584 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5585 {
5586   Mat             mat = *mmat;
5587   PetscInt        m = *mm, n = *mn;
5588   InsertMode      addv = *maddv;
5589   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5590   PetscScalar     value;
5591   PetscErrorCode  ierr;
5592 
5593   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5594   if (mat->insertmode == NOT_SET_VALUES) {
5595     mat->insertmode = addv;
5596   }
5597 #if defined(PETSC_USE_DEBUG)
5598   else if (mat->insertmode != addv) {
5599     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5600   }
5601 #endif
5602   {
5603   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5604   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5605   PetscBool       roworiented = aij->roworiented;
5606 
5607   /* Some Variables required in the macro */
5608   Mat             A = aij->A;
5609   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5610   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5611   MatScalar       *aa = a->a;
5612   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5613   Mat             B = aij->B;
5614   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5615   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5616   MatScalar       *ba = b->a;
5617 
5618   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5619   PetscInt        nonew = a->nonew;
5620   MatScalar       *ap1,*ap2;
5621 
5622   PetscFunctionBegin;
5623   for (i=0; i<m; i++) {
5624     if (im[i] < 0) continue;
5625 #if defined(PETSC_USE_DEBUG)
5626     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5627 #endif
5628     if (im[i] >= rstart && im[i] < rend) {
5629       row      = im[i] - rstart;
5630       lastcol1 = -1;
5631       rp1      = aj + ai[row];
5632       ap1      = aa + ai[row];
5633       rmax1    = aimax[row];
5634       nrow1    = ailen[row];
5635       low1     = 0;
5636       high1    = nrow1;
5637       lastcol2 = -1;
5638       rp2      = bj + bi[row];
5639       ap2      = ba + bi[row];
5640       rmax2    = bimax[row];
5641       nrow2    = bilen[row];
5642       low2     = 0;
5643       high2    = nrow2;
5644 
5645       for (j=0; j<n; j++) {
5646         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5647         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5648         if (in[j] >= cstart && in[j] < cend){
5649           col = in[j] - cstart;
5650           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5651         } else if (in[j] < 0) continue;
5652 #if defined(PETSC_USE_DEBUG)
5653         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5654 #endif
5655         else {
5656           if (mat->was_assembled) {
5657             if (!aij->colmap) {
5658               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5659             }
5660 #if defined (PETSC_USE_CTABLE)
5661             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5662 	    col--;
5663 #else
5664             col = aij->colmap[in[j]] - 1;
5665 #endif
5666             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5667               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5668               col =  in[j];
5669               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5670               B = aij->B;
5671               b = (Mat_SeqAIJ*)B->data;
5672               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5673               rp2      = bj + bi[row];
5674               ap2      = ba + bi[row];
5675               rmax2    = bimax[row];
5676               nrow2    = bilen[row];
5677               low2     = 0;
5678               high2    = nrow2;
5679               bm       = aij->B->rmap->n;
5680               ba = b->a;
5681             }
5682           } else col = in[j];
5683           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5684         }
5685       }
5686     } else {
5687       if (!aij->donotstash) {
5688         if (roworiented) {
5689           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5690         } else {
5691           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5692         }
5693       }
5694     }
5695   }}
5696   PetscFunctionReturnVoid();
5697 }
5698 EXTERN_C_END
5699 
5700