xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bd9d59bd13881f1e74cdcd52beb2fd80d006c7a7)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
8 {
9   PetscErrorCode  ierr;
10   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
11   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
12   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
13   const PetscInt  *ia,*ib;
14   const MatScalar *aa,*bb;
15   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
16   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
17 
18   PetscFunctionBegin;
19   *keptrows = 0;
20   ia = a->i;
21   ib = b->i;
22   for (i=0; i<m; i++) {
23     na = ia[i+1] - ia[i];
24     nb = ib[i+1] - ib[i];
25     if (!na && !nb) {
26       cnt++;
27       goto ok1;
28     }
29     aa = a->a + ia[i];
30     for (j=0; j<na; j++) {
31       if (aa[j] != 0.0) goto ok1;
32     }
33     bb = b->a + ib[i];
34     for (j=0; j <nb; j++) {
35       if (bb[j] != 0.0) goto ok1;
36     }
37     cnt++;
38     ok1:;
39   }
40   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
41   if (!n0rows) PetscFunctionReturn(0);
42   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
43   cnt = 0;
44   for (i=0; i<m; i++) {
45     na = ia[i+1] - ia[i];
46     nb = ib[i+1] - ib[i];
47     if (!na && !nb) continue;
48     aa = a->a + ia[i];
49     for(j=0; j<na;j++) {
50       if (aa[j] != 0.0) {
51         rows[cnt++] = rstart + i;
52         goto ok2;
53       }
54     }
55     bb = b->a + ib[i];
56     for (j=0; j<nb; j++) {
57       if (bb[j] != 0.0) {
58         rows[cnt++] = rstart + i;
59         goto ok2;
60       }
61     }
62     ok2:;
63   }
64   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatDistribute_MPIAIJ"
70 /*
71     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
72     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
73 
74     Only for square matrices
75 */
76 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
77 {
78   PetscMPIInt    rank,size;
79   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
80   PetscErrorCode ierr;
81   Mat            mat;
82   Mat_SeqAIJ     *gmata;
83   PetscMPIInt    tag;
84   MPI_Status     status;
85   PetscBool      aij;
86   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
87 
88   PetscFunctionBegin;
89   CHKMEMQ;
90   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
91   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
92   if (!rank) {
93     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
94     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
95   }
96   if (reuse == MAT_INITIAL_MATRIX) {
97     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
98     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
99     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
100     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
101     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
102     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
103     rowners[0] = 0;
104     for (i=2; i<=size; i++) {
105       rowners[i] += rowners[i-1];
106     }
107     rstart = rowners[rank];
108     rend   = rowners[rank+1];
109     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
110     if (!rank) {
111       gmata = (Mat_SeqAIJ*) gmat->data;
112       /* send row lengths to all processors */
113       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
114       for (i=1; i<size; i++) {
115 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
116       }
117       /* determine number diagonal and off-diagonal counts */
118       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
119       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
120       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
121       jj = 0;
122       for (i=0; i<m; i++) {
123 	for (j=0; j<dlens[i]; j++) {
124           if (gmata->j[jj] < rstart) ld[i]++;
125 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
126 	  jj++;
127 	}
128       }
129       /* send column indices to other processes */
130       for (i=1; i<size; i++) {
131 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
132 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
133 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134       }
135 
136       /* send numerical values to other processes */
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       gmataa = gmata->a;
142       gmataj = gmata->j;
143 
144     } else {
145       /* receive row lengths */
146       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
147       /* receive column indices */
148       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
149       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
150       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
151       /* determine number diagonal and off-diagonal counts */
152       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
153       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
154       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
155       jj = 0;
156       for (i=0; i<m; i++) {
157 	for (j=0; j<dlens[i]; j++) {
158           if (gmataj[jj] < rstart) ld[i]++;
159 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
160 	  jj++;
161 	}
162       }
163       /* receive numerical values */
164       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
165       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
166     }
167     /* set preallocation */
168     for (i=0; i<m; i++) {
169       dlens[i] -= olens[i];
170     }
171     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
172     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
173 
174     for (i=0; i<m; i++) {
175       dlens[i] += olens[i];
176     }
177     cnt  = 0;
178     for (i=0; i<m; i++) {
179       row  = rstart + i;
180       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
181       cnt += dlens[i];
182     }
183     if (rank) {
184       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
185     }
186     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
187     ierr = PetscFree(rowners);CHKERRQ(ierr);
188     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
189     *inmat = mat;
190   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
191     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
192     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
193     mat   = *inmat;
194     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
195     if (!rank) {
196       /* send numerical values to other processes */
197       gmata = (Mat_SeqAIJ*) gmat->data;
198       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
199       gmataa = gmata->a;
200       for (i=1; i<size; i++) {
201         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
202         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
203       }
204       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
205     } else {
206       /* receive numerical values from process 0*/
207       nz   = Ad->nz + Ao->nz;
208       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
209       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
210     }
211     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
212     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
213     ad = Ad->a;
214     ao = Ao->a;
215     if (mat->rmap->n) {
216       i  = 0;
217       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
218       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
219     }
220     for (i=1; i<mat->rmap->n; i++) {
221       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
222       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
223     }
224     i--;
225     if (mat->rmap->n) {
226       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
227     }
228     if (rank) {
229       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
230     }
231   }
232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   CHKMEMQ;
235   PetscFunctionReturn(0);
236 }
237 
238 /*
239   Local utility routine that creates a mapping from the global column
240 number to the local number in the off-diagonal part of the local
241 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
242 a slightly higher hash table cost; without it it is not scalable (each processor
243 has an order N integer array but is fast to acess.
244 */
245 #undef __FUNCT__
246 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
247 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
248 {
249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
250   PetscErrorCode ierr;
251   PetscInt       n = aij->B->cmap->n,i;
252 
253   PetscFunctionBegin;
254 #if defined (PETSC_USE_CTABLE)
255   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
256   for (i=0; i<n; i++){
257     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
258   }
259 #else
260   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
261   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
262   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
264 #endif
265   PetscFunctionReturn(0);
266 }
267 
268 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
269 { \
270     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
271     lastcol1 = col;\
272     while (high1-low1 > 5) { \
273       t = (low1+high1)/2; \
274       if (rp1[t] > col) high1 = t; \
275       else             low1  = t; \
276     } \
277       for (_i=low1; _i<high1; _i++) { \
278         if (rp1[_i] > col) break; \
279         if (rp1[_i] == col) { \
280           if (addv == ADD_VALUES) ap1[_i] += value;   \
281           else                    ap1[_i] = value; \
282           goto a_noinsert; \
283         } \
284       }  \
285       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
286       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
287       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
288       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
289       N = nrow1++ - 1; a->nz++; high1++; \
290       /* shift up all the later entries in this row */ \
291       for (ii=N; ii>=_i; ii--) { \
292         rp1[ii+1] = rp1[ii]; \
293         ap1[ii+1] = ap1[ii]; \
294       } \
295       rp1[_i] = col;  \
296       ap1[_i] = value;  \
297       a_noinsert: ; \
298       ailen[row] = nrow1; \
299 }
300 
301 
302 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
303 { \
304     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
305     lastcol2 = col;\
306     while (high2-low2 > 5) { \
307       t = (low2+high2)/2; \
308       if (rp2[t] > col) high2 = t; \
309       else             low2  = t; \
310     } \
311     for (_i=low2; _i<high2; _i++) {		\
312       if (rp2[_i] > col) break;			\
313       if (rp2[_i] == col) {			      \
314 	if (addv == ADD_VALUES) ap2[_i] += value;     \
315 	else                    ap2[_i] = value;      \
316 	goto b_noinsert;			      \
317       }						      \
318     }							      \
319     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
320     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
321     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
322     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
323     N = nrow2++ - 1; b->nz++; high2++;					\
324     /* shift up all the later entries in this row */			\
325     for (ii=N; ii>=_i; ii--) {						\
326       rp2[ii+1] = rp2[ii];						\
327       ap2[ii+1] = ap2[ii];						\
328     }									\
329     rp2[_i] = col;							\
330     ap2[_i] = value;							\
331     b_noinsert: ;								\
332     bilen[row] = nrow2;							\
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
337 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
338 {
339   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
340   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
341   PetscErrorCode ierr;
342   PetscInt       l,*garray = mat->garray,diag;
343 
344   PetscFunctionBegin;
345   /* code only works for square matrices A */
346 
347   /* find size of row to the left of the diagonal part */
348   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
349   row  = row - diag;
350   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
351     if (garray[b->j[b->i[row]+l]] > diag) break;
352   }
353   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
354 
355   /* diagonal part */
356   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358   /* right of diagonal part */
359   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatSetValues_MPIAIJ"
365 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
366 {
367   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
368   PetscScalar    value;
369   PetscErrorCode ierr;
370   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
371   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
372   PetscBool      roworiented = aij->roworiented;
373 
374   /* Some Variables required in the macro */
375   Mat            A = aij->A;
376   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
377   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
378   MatScalar      *aa = a->a;
379   PetscBool      ignorezeroentries = a->ignorezeroentries;
380   Mat            B = aij->B;
381   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
382   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
383   MatScalar      *ba = b->a;
384 
385   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
386   PetscInt       nonew = a->nonew;
387   MatScalar      *ap1,*ap2;
388 
389   PetscFunctionBegin;
390   if (v) PetscValidScalarPointer(v,6);
391   for (i=0; i<m; i++) {
392     if (im[i] < 0) continue;
393 #if defined(PETSC_USE_DEBUG)
394     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
395 #endif
396     if (im[i] >= rstart && im[i] < rend) {
397       row      = im[i] - rstart;
398       lastcol1 = -1;
399       rp1      = aj + ai[row];
400       ap1      = aa + ai[row];
401       rmax1    = aimax[row];
402       nrow1    = ailen[row];
403       low1     = 0;
404       high1    = nrow1;
405       lastcol2 = -1;
406       rp2      = bj + bi[row];
407       ap2      = ba + bi[row];
408       rmax2    = bimax[row];
409       nrow2    = bilen[row];
410       low2     = 0;
411       high2    = nrow2;
412 
413       for (j=0; j<n; j++) {
414         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
415         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
416         if (in[j] >= cstart && in[j] < cend){
417           col = in[j] - cstart;
418           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!aij->colmap) {
426               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
430 	    col--;
431 #else
432             col = aij->colmap[in[j]] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
435               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
438               B = aij->B;
439               b = (Mat_SeqAIJ*)B->data;
440               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
441               rp2      = bj + bi[row];
442               ap2      = ba + bi[row];
443               rmax2    = bimax[row];
444               nrow2    = bilen[row];
445               low2     = 0;
446               high2    = nrow2;
447               bm       = aij->B->rmap->n;
448               ba = b->a;
449             }
450           } else col = in[j];
451           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
452         }
453       }
454     } else {
455       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
456       if (!aij->donotstash) {
457         if (roworiented) {
458           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
459         } else {
460           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatGetValues_MPIAIJ"
470 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
471 {
472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
473   PetscErrorCode ierr;
474   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
475   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
476 
477   PetscFunctionBegin;
478   for (i=0; i<m; i++) {
479     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
480     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
481     if (idxm[i] >= rstart && idxm[i] < rend) {
482       row = idxm[i] - rstart;
483       for (j=0; j<n; j++) {
484         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
485         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
486         if (idxn[j] >= cstart && idxn[j] < cend){
487           col = idxn[j] - cstart;
488           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
489         } else {
490           if (!aij->colmap) {
491             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
492           }
493 #if defined (PETSC_USE_CTABLE)
494           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
495           col --;
496 #else
497           col = aij->colmap[idxn[j]] - 1;
498 #endif
499           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
500           else {
501             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
502           }
503         }
504       }
505     } else {
506       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
507     }
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
516 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
519   PetscErrorCode ierr;
520   PetscInt       nstash,reallocs;
521   InsertMode     addv;
522 
523   PetscFunctionBegin;
524   if (aij->donotstash || mat->nooffprocentries) {
525     PetscFunctionReturn(0);
526   }
527 
528   /* make sure all processors are either in INSERTMODE or ADDMODE */
529   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
530   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
531   mat->insertmode = addv; /* in case this processor had no cache */
532 
533   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
534   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
535   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
541 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
542 {
543   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
544   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
545   PetscErrorCode ierr;
546   PetscMPIInt    n;
547   PetscInt       i,j,rstart,ncols,flg;
548   PetscInt       *row,*col;
549   PetscBool      other_disassembled;
550   PetscScalar    *val;
551   InsertMode     addv = mat->insertmode;
552 
553   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
554   PetscFunctionBegin;
555   if (!aij->donotstash && !mat->nooffprocentries) {
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         /* Now assemble all these values with a single function call */
566         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
571   }
572   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
589   }
590   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
591   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
596   aij->rowvalues = 0;
597 
598   /* used by MatAXPY() */
599   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
600   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
601 
602   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
603   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
609 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
610 {
611   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
616   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatZeroRows_MPIAIJ"
622 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
623 {
624   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
625   PetscErrorCode    ierr;
626   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
627   PetscInt          i,*owners = A->rmap->range;
628   PetscInt          *nprocs,j,idx,nsends,row;
629   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
630   PetscInt          *rvalues,count,base,slen,*source;
631   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
632   MPI_Comm          comm = ((PetscObject)A)->comm;
633   MPI_Request       *send_waits,*recv_waits;
634   MPI_Status        recv_status,*send_status;
635   const PetscScalar *xx;
636   PetscScalar       *bb;
637 #if defined(PETSC_DEBUG)
638   PetscBool      found = PETSC_FALSE;
639 #endif
640 
641   PetscFunctionBegin;
642   /*  first count number of contributors to each processor */
643   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
644   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
645   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
646   j = 0;
647   for (i=0; i<N; i++) {
648     if (lastidx > (idx = rows[i])) j = 0;
649     lastidx = idx;
650     for (; j<size; j++) {
651       if (idx >= owners[j] && idx < owners[j+1]) {
652         nprocs[2*j]++;
653         nprocs[2*j+1] = 1;
654         owner[i] = j;
655 #if defined(PETSC_DEBUG)
656         found = PETSC_TRUE;
657 #endif
658         break;
659       }
660     }
661 #if defined(PETSC_DEBUG)
662     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
663     found = PETSC_FALSE;
664 #endif
665   }
666   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
667 
668   if (A->nooffproczerorows) {
669     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
670     nrecvs = nsends;
671     nmax   = N;
672   } else {
673     /* inform other processors of number of messages and max length*/
674     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
675   }
676 
677   /* post receives:   */
678   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
679   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
680   for (i=0; i<nrecvs; i++) {
681     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
682   }
683 
684   /* do sends:
685       1) starts[i] gives the starting index in svalues for stuff going to
686          the ith processor
687   */
688   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
689   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
690   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
691   starts[0] = 0;
692   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
693   for (i=0; i<N; i++) {
694     svalues[starts[owner[i]]++] = rows[i];
695   }
696 
697   starts[0] = 0;
698   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
699   count = 0;
700   for (i=0; i<size; i++) {
701     if (nprocs[2*i+1]) {
702       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
703     }
704   }
705   ierr = PetscFree(starts);CHKERRQ(ierr);
706 
707   base = owners[rank];
708 
709   /*  wait on receives */
710   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
711   count  = nrecvs; slen = 0;
712   while (count) {
713     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
714     /* unpack receives into our local space */
715     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
716     source[imdex]  = recv_status.MPI_SOURCE;
717     lens[imdex]    = n;
718     slen          += n;
719     count--;
720   }
721   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
722 
723   /* move the data into the send scatter */
724   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
725   count = 0;
726   for (i=0; i<nrecvs; i++) {
727     values = rvalues + i*nmax;
728     for (j=0; j<lens[i]; j++) {
729       lrows[count++] = values[j] - base;
730     }
731   }
732   ierr = PetscFree(rvalues);CHKERRQ(ierr);
733   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
734   ierr = PetscFree(owner);CHKERRQ(ierr);
735   ierr = PetscFree(nprocs);CHKERRQ(ierr);
736 
737   /* fix right hand side if needed */
738   if (x && b) {
739     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
740     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
741     for (i=0; i<slen; i++) {
742       bb[lrows[i]] = diag*xx[lrows[i]];
743     }
744     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
745     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
746   }
747   /*
748         Zero the required rows. If the "diagonal block" of the matrix
749      is square and the user wishes to set the diagonal we use separate
750      code so that MatSetValues() is not called for each diagonal allocating
751      new memory, thus calling lots of mallocs and slowing things down.
752 
753   */
754   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
755   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
756   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
757     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
758   } else if (diag != 0.0) {
759     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
760     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
761       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
762 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
763     }
764     for (i = 0; i < slen; i++) {
765       row  = lrows[i] + rstart;
766       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
767     }
768     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
769     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770   } else {
771     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
772   }
773   ierr = PetscFree(lrows);CHKERRQ(ierr);
774 
775   /* wait on sends */
776   if (nsends) {
777     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
778     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
779     ierr = PetscFree(send_status);CHKERRQ(ierr);
780   }
781   ierr = PetscFree(send_waits);CHKERRQ(ierr);
782   ierr = PetscFree(svalues);CHKERRQ(ierr);
783 
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
789 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
790 {
791   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
792   PetscErrorCode    ierr;
793   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
794   PetscInt          i,*owners = A->rmap->range;
795   PetscInt          *nprocs,j,idx,nsends;
796   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
797   PetscInt          *rvalues,count,base,slen,*source;
798   PetscInt          *lens,*lrows,*values,m;
799   MPI_Comm          comm = ((PetscObject)A)->comm;
800   MPI_Request       *send_waits,*recv_waits;
801   MPI_Status        recv_status,*send_status;
802   const PetscScalar *xx;
803   PetscScalar       *bb,*mask;
804   Vec               xmask,lmask;
805   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
806   const PetscInt    *aj, *ii,*ridx;
807   PetscScalar       *aa;
808 #if defined(PETSC_DEBUG)
809   PetscBool         found = PETSC_FALSE;
810 #endif
811 
812   PetscFunctionBegin;
813   /*  first count number of contributors to each processor */
814   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
815   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
816   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
817   j = 0;
818   for (i=0; i<N; i++) {
819     if (lastidx > (idx = rows[i])) j = 0;
820     lastidx = idx;
821     for (; j<size; j++) {
822       if (idx >= owners[j] && idx < owners[j+1]) {
823         nprocs[2*j]++;
824         nprocs[2*j+1] = 1;
825         owner[i] = j;
826 #if defined(PETSC_DEBUG)
827         found = PETSC_TRUE;
828 #endif
829         break;
830       }
831     }
832 #if defined(PETSC_DEBUG)
833     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
834     found = PETSC_FALSE;
835 #endif
836   }
837   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
838 
839   /* inform other processors of number of messages and max length*/
840   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
841 
842   /* post receives:   */
843   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
844   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
845   for (i=0; i<nrecvs; i++) {
846     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
847   }
848 
849   /* do sends:
850       1) starts[i] gives the starting index in svalues for stuff going to
851          the ith processor
852   */
853   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
854   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
855   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
856   starts[0] = 0;
857   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
858   for (i=0; i<N; i++) {
859     svalues[starts[owner[i]]++] = rows[i];
860   }
861 
862   starts[0] = 0;
863   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
864   count = 0;
865   for (i=0; i<size; i++) {
866     if (nprocs[2*i+1]) {
867       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
868     }
869   }
870   ierr = PetscFree(starts);CHKERRQ(ierr);
871 
872   base = owners[rank];
873 
874   /*  wait on receives */
875   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
876   count  = nrecvs; slen = 0;
877   while (count) {
878     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
879     /* unpack receives into our local space */
880     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
881     source[imdex]  = recv_status.MPI_SOURCE;
882     lens[imdex]    = n;
883     slen          += n;
884     count--;
885   }
886   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
887 
888   /* move the data into the send scatter */
889   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
890   count = 0;
891   for (i=0; i<nrecvs; i++) {
892     values = rvalues + i*nmax;
893     for (j=0; j<lens[i]; j++) {
894       lrows[count++] = values[j] - base;
895     }
896   }
897   ierr = PetscFree(rvalues);CHKERRQ(ierr);
898   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
899   ierr = PetscFree(owner);CHKERRQ(ierr);
900   ierr = PetscFree(nprocs);CHKERRQ(ierr);
901   /* lrows are the local rows to be zeroed, slen is the number of local rows */
902 
903   /* zero diagonal part of matrix */
904   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
905 
906   /* handle off diagonal part of matrix */
907   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
908   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
909   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
910   for (i=0; i<slen; i++) {
911     bb[lrows[i]] = 1;
912   }
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(xmask);CHKERRQ(ierr);
917   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
921   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922 
923   /* remove zeroed rows of off diagonal matrix */
924   ii = aij->i;
925   for (i=0; i<slen; i++) {
926     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
927   }
928 
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use){
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++){
935       n   = ii[i+1] - ii[i];
936       aj  = aij->j + ii[i];
937       aa  = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           bb[*ridx] -= *aa*xx[*aj];
942           *aa        = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951    for (i=0; i<m; i++) {
952      n   = ii[i+1] - ii[i];
953      aj  = aij->j + ii[i];
954      aa  = aij->a + ii[i];
955      for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           bb[i] -= *aa*xx[*aj];
958           *aa    = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965 
966   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
968   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   ierr = VecDestroy(lmask);CHKERRQ(ierr);
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971 
972   /* wait on sends */
973   if (nsends) {
974     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
975     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
976     ierr = PetscFree(send_status);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(send_waits);CHKERRQ(ierr);
979   ierr = PetscFree(svalues);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMult_MPIAIJ"
986 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscInt       nt;
991 
992   PetscFunctionBegin;
993   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
994   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
995   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1004 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1016 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1024   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1031 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1032 {
1033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1034   PetscErrorCode ierr;
1035   PetscBool      merged;
1036 
1037   PetscFunctionBegin;
1038   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1039   /* do nondiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1041   if (!merged) {
1042     /* send it on its way */
1043     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044     /* do local part */
1045     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1046     /* receive remote parts: note this assumes the values are not actually */
1047     /* added in yy until the next line, */
1048     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049   } else {
1050     /* do local part */
1051     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1052     /* send it on its way */
1053     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054     /* values actually were received in the Begin() but we need to call this nop */
1055     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 EXTERN_C_BEGIN
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1063 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1064 {
1065   MPI_Comm       comm;
1066   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1067   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1068   IS             Me,Notme;
1069   PetscErrorCode ierr;
1070   PetscInt       M,N,first,last,*notme,i;
1071   PetscMPIInt    size;
1072 
1073   PetscFunctionBegin;
1074 
1075   /* Easy test: symmetric diagonal block */
1076   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1077   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1078   if (!*f) PetscFunctionReturn(0);
1079   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081   if (size == 1) PetscFunctionReturn(0);
1082 
1083   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1084   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1085   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1086   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1087   for (i=0; i<first; i++) notme[i] = i;
1088   for (i=last; i<M; i++) notme[i-last+first] = i;
1089   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1090   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1091   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1092   Aoff = Aoffs[0];
1093   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1094   Boff = Boffs[0];
1095   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1096   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1098   ierr = ISDestroy(Me);CHKERRQ(ierr);
1099   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1100   ierr = PetscFree(notme);CHKERRQ(ierr);
1101 
1102   PetscFunctionReturn(0);
1103 }
1104 EXTERN_C_END
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1108 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   /* do nondiagonal part */
1115   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1116   /* send it on its way */
1117   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1118   /* do local part */
1119   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1120   /* receive remote parts */
1121   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*
1126   This only works correctly for square matrices where the subblock A->A is the
1127    diagonal block
1128 */
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1131 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1132 {
1133   PetscErrorCode ierr;
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1138   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1139   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatScale_MPIAIJ"
1145 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1146 {
1147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1152   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatDestroy_MPIAIJ"
1158 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1159 {
1160   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164 #if defined(PETSC_USE_LOG)
1165   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1166 #endif
1167   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1168   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1169   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1170   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1171 #if defined (PETSC_USE_CTABLE)
1172   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1173 #else
1174   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1175 #endif
1176   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1177   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1178   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1179   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1180   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1181   ierr = PetscFree(aij);CHKERRQ(ierr);
1182 
1183   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1191   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1197 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1198 {
1199   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1200   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1201   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1202   PetscErrorCode    ierr;
1203   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1204   int               fd;
1205   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1206   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1207   PetscScalar       *column_values;
1208   PetscInt          message_count,flowcontrolcount;
1209 
1210   PetscFunctionBegin;
1211   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1212   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1213   nz   = A->nz + B->nz;
1214   if (!rank) {
1215     header[0] = MAT_FILE_CLASSID;
1216     header[1] = mat->rmap->N;
1217     header[2] = mat->cmap->N;
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1219     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1220     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1221     /* get largest number of rows any processor has */
1222     rlen = mat->rmap->n;
1223     range = mat->rmap->range;
1224     for (i=1; i<size; i++) {
1225       rlen = PetscMax(rlen,range[i+1] - range[i]);
1226     }
1227   } else {
1228     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1229     rlen = mat->rmap->n;
1230   }
1231 
1232   /* load up the local row counts */
1233   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1234   for (i=0; i<mat->rmap->n; i++) {
1235     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1236   }
1237 
1238   /* store the row lengths to the file */
1239   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1240   if (!rank) {
1241     MPI_Status status;
1242     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1243     for (i=1; i<size; i++) {
1244       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1245       rlen = range[i+1] - range[i];
1246       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1247       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1248     }
1249     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1250   } else {
1251     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1252     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1253     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1254   }
1255   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1256 
1257   /* load up the local column indices */
1258   nzmax = nz; /* )th processor needs space a largest processor needs */
1259   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1260   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1261   cnt  = 0;
1262   for (i=0; i<mat->rmap->n; i++) {
1263     for (j=B->i[i]; j<B->i[i+1]; j++) {
1264       if ( (col = garray[B->j[j]]) > cstart) break;
1265       column_indices[cnt++] = col;
1266     }
1267     for (k=A->i[i]; k<A->i[i+1]; k++) {
1268       column_indices[cnt++] = A->j[k] + cstart;
1269     }
1270     for (; j<B->i[i+1]; j++) {
1271       column_indices[cnt++] = garray[B->j[j]];
1272     }
1273   }
1274   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1275 
1276   /* store the column indices to the file */
1277    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1278   if (!rank) {
1279     MPI_Status status;
1280     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1281     for (i=1; i<size; i++) {
1282       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1283       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1284       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1285       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1286       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1287     }
1288      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1289   } else {
1290     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1291     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1292     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1293     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1294   }
1295   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1296 
1297   /* load up the local column values */
1298   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1299   cnt  = 0;
1300   for (i=0; i<mat->rmap->n; i++) {
1301     for (j=B->i[i]; j<B->i[i+1]; j++) {
1302       if ( garray[B->j[j]] > cstart) break;
1303       column_values[cnt++] = B->a[j];
1304     }
1305     for (k=A->i[i]; k<A->i[i+1]; k++) {
1306       column_values[cnt++] = A->a[k];
1307     }
1308     for (; j<B->i[i+1]; j++) {
1309       column_values[cnt++] = B->a[j];
1310     }
1311   }
1312   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1313 
1314   /* store the column values to the file */
1315    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1316   if (!rank) {
1317     MPI_Status status;
1318     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1319     for (i=1; i<size; i++) {
1320        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1321       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1322       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1323       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1324       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1325     }
1326     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1327   } else {
1328     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1329     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1331     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1332   }
1333   ierr = PetscFree(column_values);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1339 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1340 {
1341   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1342   PetscErrorCode    ierr;
1343   PetscMPIInt       rank = aij->rank,size = aij->size;
1344   PetscBool         isdraw,iascii,isbinary;
1345   PetscViewer       sviewer;
1346   PetscViewerFormat format;
1347 
1348   PetscFunctionBegin;
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1350   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1351   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1352   if (iascii) {
1353     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1354     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1355       MatInfo    info;
1356       PetscBool  inodes;
1357 
1358       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1359       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1360       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1361       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1362       if (!inodes) {
1363         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1364 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1365       } else {
1366         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1367 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1368       }
1369       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1370       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1371       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1372       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1373       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1374       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1375       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1376       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1377       PetscFunctionReturn(0);
1378     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1379       PetscInt   inodecount,inodelimit,*inodes;
1380       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1381       if (inodes) {
1382         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1383       } else {
1384         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1385       }
1386       PetscFunctionReturn(0);
1387     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1388       PetscFunctionReturn(0);
1389     }
1390   } else if (isbinary) {
1391     if (size == 1) {
1392       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1393       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1394     } else {
1395       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1396     }
1397     PetscFunctionReturn(0);
1398   } else if (isdraw) {
1399     PetscDraw  draw;
1400     PetscBool  isnull;
1401     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1402     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1403   }
1404 
1405   if (size == 1) {
1406     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1407     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1408   } else {
1409     /* assemble the entire matrix onto first processor. */
1410     Mat         A;
1411     Mat_SeqAIJ  *Aloc;
1412     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1413     MatScalar   *a;
1414 
1415     if (mat->rmap->N > 1024) {
1416       PetscBool  flg = PETSC_FALSE;
1417 
1418       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1419       if (!flg) {
1420         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1421       }
1422     }
1423 
1424     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1425     if (!rank) {
1426       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1427     } else {
1428       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1429     }
1430     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1431     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1432     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1433     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1434 
1435     /* copy over the A part */
1436     Aloc = (Mat_SeqAIJ*)aij->A->data;
1437     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1438     row = mat->rmap->rstart;
1439     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1440     for (i=0; i<m; i++) {
1441       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1442       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1443     }
1444     aj = Aloc->j;
1445     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1446 
1447     /* copy over the B part */
1448     Aloc = (Mat_SeqAIJ*)aij->B->data;
1449     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1450     row  = mat->rmap->rstart;
1451     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1452     ct   = cols;
1453     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1454     for (i=0; i<m; i++) {
1455       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1456       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1457     }
1458     ierr = PetscFree(ct);CHKERRQ(ierr);
1459     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461     /*
1462        Everyone has to call to draw the matrix since the graphics waits are
1463        synchronized across all processors that share the PetscDraw object
1464     */
1465     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1466     if (!rank) {
1467       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1468       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1469       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1470       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1471     }
1472     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1473     ierr = MatDestroy(A);CHKERRQ(ierr);
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatView_MPIAIJ"
1480 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1481 {
1482   PetscErrorCode ierr;
1483   PetscBool      iascii,isdraw,issocket,isbinary;
1484 
1485   PetscFunctionBegin;
1486   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1487   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1488   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1489   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1490   if (iascii || isdraw || isbinary || issocket) {
1491     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1492   } else {
1493     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatSOR_MPIAIJ"
1500 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1501 {
1502   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1503   PetscErrorCode ierr;
1504   Vec            bb1 = 0;
1505   PetscBool      hasop;
1506 
1507   PetscFunctionBegin;
1508   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1509     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1510   }
1511 
1512   if (flag == SOR_APPLY_UPPER) {
1513     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514     PetscFunctionReturn(0);
1515   }
1516 
1517   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1518     if (flag & SOR_ZERO_INITIAL_GUESS) {
1519       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1520       its--;
1521     }
1522 
1523     while (its--) {
1524       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1525       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1526 
1527       /* update rhs: bb1 = bb - B*x */
1528       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1529       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1530 
1531       /* local sweep */
1532       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1533     }
1534   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1535     if (flag & SOR_ZERO_INITIAL_GUESS) {
1536       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1537       its--;
1538     }
1539     while (its--) {
1540       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1542 
1543       /* update rhs: bb1 = bb - B*x */
1544       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1545       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1546 
1547       /* local sweep */
1548       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1549     }
1550   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1551     if (flag & SOR_ZERO_INITIAL_GUESS) {
1552       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1553       its--;
1554     }
1555     while (its--) {
1556       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1557       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558 
1559       /* update rhs: bb1 = bb - B*x */
1560       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1561       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1562 
1563       /* local sweep */
1564       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1565     }
1566   }  else if (flag & SOR_EISENSTAT) {
1567     Vec         xx1;
1568 
1569     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1570     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1571 
1572     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1573     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1574     if (!mat->diag) {
1575       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1576       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1577     }
1578     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1579     if (hasop) {
1580       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1581     } else {
1582       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1583     }
1584     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1585 
1586     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1587 
1588     /* local sweep */
1589     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1590     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1591     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1592   } else {
1593     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1594   }
1595 
1596   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatPermute_MPIAIJ"
1602 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1603 {
1604   MPI_Comm       comm,pcomm;
1605   PetscInt       first,local_size,nrows;
1606   const PetscInt *rows;
1607   PetscMPIInt    size;
1608   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1613   /* make a collective version of 'rowp' */
1614   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1615   if (pcomm==comm) {
1616     crowp = rowp;
1617   } else {
1618     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1619     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1620     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1621     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1622   }
1623   /* collect the global row permutation and invert it */
1624   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1625   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1626   if (pcomm!=comm) {
1627     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1628   }
1629   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1630   /* get the local target indices */
1631   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1632   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1633   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1634   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1635   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1636   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1637   /* the column permutation is so much easier;
1638      make a local version of 'colp' and invert it */
1639   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1640   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1641   if (size==1) {
1642     lcolp = colp;
1643   } else {
1644     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1645     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1646     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1647   }
1648   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1649   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1650   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1651   if (size>1) {
1652     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1653     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1654   }
1655   /* now we just get the submatrix */
1656   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1657   /* clean up */
1658   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1659   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1665 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1666 {
1667   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1668   Mat            A = mat->A,B = mat->B;
1669   PetscErrorCode ierr;
1670   PetscReal      isend[5],irecv[5];
1671 
1672   PetscFunctionBegin;
1673   info->block_size     = 1.0;
1674   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1675   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1676   isend[3] = info->memory;  isend[4] = info->mallocs;
1677   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1678   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1679   isend[3] += info->memory;  isend[4] += info->mallocs;
1680   if (flag == MAT_LOCAL) {
1681     info->nz_used      = isend[0];
1682     info->nz_allocated = isend[1];
1683     info->nz_unneeded  = isend[2];
1684     info->memory       = isend[3];
1685     info->mallocs      = isend[4];
1686   } else if (flag == MAT_GLOBAL_MAX) {
1687     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1688     info->nz_used      = irecv[0];
1689     info->nz_allocated = irecv[1];
1690     info->nz_unneeded  = irecv[2];
1691     info->memory       = irecv[3];
1692     info->mallocs      = irecv[4];
1693   } else if (flag == MAT_GLOBAL_SUM) {
1694     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1695     info->nz_used      = irecv[0];
1696     info->nz_allocated = irecv[1];
1697     info->nz_unneeded  = irecv[2];
1698     info->memory       = irecv[3];
1699     info->mallocs      = irecv[4];
1700   }
1701   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1702   info->fill_ratio_needed = 0;
1703   info->factor_mallocs    = 0;
1704 
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatSetOption_MPIAIJ"
1710 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1711 {
1712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   switch (op) {
1717   case MAT_NEW_NONZERO_LOCATIONS:
1718   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1719   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1720   case MAT_KEEP_NONZERO_PATTERN:
1721   case MAT_NEW_NONZERO_LOCATION_ERR:
1722   case MAT_USE_INODES:
1723   case MAT_IGNORE_ZERO_ENTRIES:
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_ROW_ORIENTED:
1728     a->roworiented = flg;
1729     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1730     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1731     break;
1732   case MAT_NEW_DIAGONALS:
1733     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1734     break;
1735   case MAT_IGNORE_OFF_PROC_ENTRIES:
1736     a->donotstash = PETSC_TRUE;
1737     break;
1738   case MAT_SPD:
1739     A->spd_set                         = PETSC_TRUE;
1740     A->spd                             = flg;
1741     if (flg) {
1742       A->symmetric                     = PETSC_TRUE;
1743       A->structurally_symmetric        = PETSC_TRUE;
1744       A->symmetric_set                 = PETSC_TRUE;
1745       A->structurally_symmetric_set    = PETSC_TRUE;
1746     }
1747     break;
1748   case MAT_SYMMETRIC:
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_STRUCTURALLY_SYMMETRIC:
1752     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_HERMITIAN:
1755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1756     break;
1757   case MAT_SYMMETRY_ETERNAL:
1758     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1759     break;
1760   default:
1761     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1762   }
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatGetRow_MPIAIJ"
1768 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1769 {
1770   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1771   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1772   PetscErrorCode ierr;
1773   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1774   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1775   PetscInt       *cmap,*idx_p;
1776 
1777   PetscFunctionBegin;
1778   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1779   mat->getrowactive = PETSC_TRUE;
1780 
1781   if (!mat->rowvalues && (idx || v)) {
1782     /*
1783         allocate enough space to hold information from the longest row.
1784     */
1785     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1786     PetscInt   max = 1,tmp;
1787     for (i=0; i<matin->rmap->n; i++) {
1788       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1789       if (max < tmp) { max = tmp; }
1790     }
1791     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1792   }
1793 
1794   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1795   lrow = row - rstart;
1796 
1797   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1798   if (!v)   {pvA = 0; pvB = 0;}
1799   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1800   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1801   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1802   nztot = nzA + nzB;
1803 
1804   cmap  = mat->garray;
1805   if (v  || idx) {
1806     if (nztot) {
1807       /* Sort by increasing column numbers, assuming A and B already sorted */
1808       PetscInt imark = -1;
1809       if (v) {
1810         *v = v_p = mat->rowvalues;
1811         for (i=0; i<nzB; i++) {
1812           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1813           else break;
1814         }
1815         imark = i;
1816         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1817         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1818       }
1819       if (idx) {
1820         *idx = idx_p = mat->rowindices;
1821         if (imark > -1) {
1822           for (i=0; i<imark; i++) {
1823             idx_p[i] = cmap[cworkB[i]];
1824           }
1825         } else {
1826           for (i=0; i<nzB; i++) {
1827             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1828             else break;
1829           }
1830           imark = i;
1831         }
1832         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1833         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1834       }
1835     } else {
1836       if (idx) *idx = 0;
1837       if (v)   *v   = 0;
1838     }
1839   }
1840   *nz = nztot;
1841   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1842   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1848 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1849 {
1850   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1851 
1852   PetscFunctionBegin;
1853   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1854   aij->getrowactive = PETSC_FALSE;
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatNorm_MPIAIJ"
1860 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1861 {
1862   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1863   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1864   PetscErrorCode ierr;
1865   PetscInt       i,j,cstart = mat->cmap->rstart;
1866   PetscReal      sum = 0.0;
1867   MatScalar      *v;
1868 
1869   PetscFunctionBegin;
1870   if (aij->size == 1) {
1871     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1872   } else {
1873     if (type == NORM_FROBENIUS) {
1874       v = amat->a;
1875       for (i=0; i<amat->nz; i++) {
1876 #if defined(PETSC_USE_COMPLEX)
1877         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1878 #else
1879         sum += (*v)*(*v); v++;
1880 #endif
1881       }
1882       v = bmat->a;
1883       for (i=0; i<bmat->nz; i++) {
1884 #if defined(PETSC_USE_COMPLEX)
1885         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1886 #else
1887         sum += (*v)*(*v); v++;
1888 #endif
1889       }
1890       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1891       *norm = sqrt(*norm);
1892     } else if (type == NORM_1) { /* max column norm */
1893       PetscReal *tmp,*tmp2;
1894       PetscInt  *jj,*garray = aij->garray;
1895       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1896       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1897       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1898       *norm = 0.0;
1899       v = amat->a; jj = amat->j;
1900       for (j=0; j<amat->nz; j++) {
1901         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1902       }
1903       v = bmat->a; jj = bmat->j;
1904       for (j=0; j<bmat->nz; j++) {
1905         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1906       }
1907       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1908       for (j=0; j<mat->cmap->N; j++) {
1909         if (tmp2[j] > *norm) *norm = tmp2[j];
1910       }
1911       ierr = PetscFree(tmp);CHKERRQ(ierr);
1912       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1913     } else if (type == NORM_INFINITY) { /* max row norm */
1914       PetscReal ntemp = 0.0;
1915       for (j=0; j<aij->A->rmap->n; j++) {
1916         v = amat->a + amat->i[j];
1917         sum = 0.0;
1918         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1919           sum += PetscAbsScalar(*v); v++;
1920         }
1921         v = bmat->a + bmat->i[j];
1922         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1923           sum += PetscAbsScalar(*v); v++;
1924         }
1925         if (sum > ntemp) ntemp = sum;
1926       }
1927       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1928     } else {
1929       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1930     }
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatTranspose_MPIAIJ"
1937 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1938 {
1939   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1940   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1941   PetscErrorCode ierr;
1942   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1943   PetscInt       cstart=A->cmap->rstart,ncol;
1944   Mat            B;
1945   MatScalar      *array;
1946 
1947   PetscFunctionBegin;
1948   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1949 
1950   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1951   ai = Aloc->i; aj = Aloc->j;
1952   bi = Bloc->i; bj = Bloc->j;
1953   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1954     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1955     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1956     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1957     for (i=0; i<ai[ma]; i++){
1958       d_nnz[aj[i]] ++;
1959       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1960     }
1961 
1962     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1963     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1964     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1965     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1966     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1967   } else {
1968     B = *matout;
1969   }
1970 
1971   /* copy over the A part */
1972   array = Aloc->a;
1973   row = A->rmap->rstart;
1974   for (i=0; i<ma; i++) {
1975     ncol = ai[i+1]-ai[i];
1976     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++; array += ncol; aj += ncol;
1978   }
1979   aj = Aloc->j;
1980   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1981 
1982   /* copy over the B part */
1983   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1984   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1985   array = Bloc->a;
1986   row = A->rmap->rstart;
1987   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1988   cols_tmp = cols;
1989   for (i=0; i<mb; i++) {
1990     ncol = bi[i+1]-bi[i];
1991     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1992     row++; array += ncol; cols_tmp += ncol;
1993   }
1994   ierr = PetscFree(cols);CHKERRQ(ierr);
1995 
1996   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1997   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1999     *matout = B;
2000   } else {
2001     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2002   }
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2008 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2009 {
2010   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2011   Mat            a = aij->A,b = aij->B;
2012   PetscErrorCode ierr;
2013   PetscInt       s1,s2,s3;
2014 
2015   PetscFunctionBegin;
2016   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2017   if (rr) {
2018     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2019     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2020     /* Overlap communication with computation. */
2021     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2022   }
2023   if (ll) {
2024     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2025     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2026     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2027   }
2028   /* scale  the diagonal block */
2029   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2030 
2031   if (rr) {
2032     /* Do a scatter end and then right scale the off-diagonal block */
2033     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2034     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2035   }
2036 
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2042 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2043 {
2044   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2049   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2050   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2051   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2056 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2057 {
2058   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2059   PetscErrorCode ierr;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatEqual_MPIAIJ"
2068 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2069 {
2070   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2071   Mat            a,b,c,d;
2072   PetscBool      flg;
2073   PetscErrorCode ierr;
2074 
2075   PetscFunctionBegin;
2076   a = matA->A; b = matA->B;
2077   c = matB->A; d = matB->B;
2078 
2079   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2080   if (flg) {
2081     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2082   }
2083   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatCopy_MPIAIJ"
2089 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2090 {
2091   PetscErrorCode ierr;
2092   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2093   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2094 
2095   PetscFunctionBegin;
2096   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2097   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2098     /* because of the column compression in the off-processor part of the matrix a->B,
2099        the number of columns in a->B and b->B may be different, hence we cannot call
2100        the MatCopy() directly on the two parts. If need be, we can provide a more
2101        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2102        then copying the submatrices */
2103     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2104   } else {
2105     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2106     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2107   }
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 #undef __FUNCT__
2112 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2113 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2114 {
2115   PetscErrorCode ierr;
2116 
2117   PetscFunctionBegin;
2118   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatAXPY_MPIAIJ"
2124 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2125 {
2126   PetscErrorCode ierr;
2127   PetscInt       i;
2128   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2129   PetscBLASInt   bnz,one=1;
2130   Mat_SeqAIJ     *x,*y;
2131 
2132   PetscFunctionBegin;
2133   if (str == SAME_NONZERO_PATTERN) {
2134     PetscScalar alpha = a;
2135     x = (Mat_SeqAIJ *)xx->A->data;
2136     y = (Mat_SeqAIJ *)yy->A->data;
2137     bnz = PetscBLASIntCast(x->nz);
2138     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2139     x = (Mat_SeqAIJ *)xx->B->data;
2140     y = (Mat_SeqAIJ *)yy->B->data;
2141     bnz = PetscBLASIntCast(x->nz);
2142     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2143   } else if (str == SUBSET_NONZERO_PATTERN) {
2144     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2145 
2146     x = (Mat_SeqAIJ *)xx->B->data;
2147     y = (Mat_SeqAIJ *)yy->B->data;
2148     if (y->xtoy && y->XtoY != xx->B) {
2149       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2150       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2151     }
2152     if (!y->xtoy) { /* get xtoy */
2153       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2154       y->XtoY = xx->B;
2155       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2156     }
2157     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2158   } else {
2159     Mat B;
2160     PetscInt *nnz_d,*nnz_o;
2161     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2162     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2163     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2164     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2165     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2166     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2167     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2168     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2169     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2170     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2171     ierr = MatHeaderReplace(Y,B);
2172     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2173     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatConjugate_MPIAIJ"
2182 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2183 {
2184 #if defined(PETSC_USE_COMPLEX)
2185   PetscErrorCode ierr;
2186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2187 
2188   PetscFunctionBegin;
2189   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2190   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2191 #else
2192   PetscFunctionBegin;
2193 #endif
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 #undef __FUNCT__
2198 #define __FUNCT__ "MatRealPart_MPIAIJ"
2199 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2200 {
2201   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2202   PetscErrorCode ierr;
2203 
2204   PetscFunctionBegin;
2205   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2206   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2212 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2213 {
2214   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2215   PetscErrorCode ierr;
2216 
2217   PetscFunctionBegin;
2218   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2219   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #ifdef PETSC_HAVE_PBGL
2224 
2225 #include <boost/parallel/mpi/bsp_process_group.hpp>
2226 #include <boost/graph/distributed/ilu_default_graph.hpp>
2227 #include <boost/graph/distributed/ilu_0_block.hpp>
2228 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2229 #include <boost/graph/distributed/petsc/interface.hpp>
2230 #include <boost/multi_array.hpp>
2231 #include <boost/parallel/distributed_property_map->hpp>
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2235 /*
2236   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2237 */
2238 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2239 {
2240   namespace petsc = boost::distributed::petsc;
2241 
2242   namespace graph_dist = boost::graph::distributed;
2243   using boost::graph::distributed::ilu_default::process_group_type;
2244   using boost::graph::ilu_permuted;
2245 
2246   PetscBool       row_identity, col_identity;
2247   PetscContainer  c;
2248   PetscInt        m, n, M, N;
2249   PetscErrorCode  ierr;
2250 
2251   PetscFunctionBegin;
2252   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2253   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2254   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2255   if (!row_identity || !col_identity) {
2256     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2257   }
2258 
2259   process_group_type pg;
2260   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2261   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2262   lgraph_type&   level_graph = *lgraph_p;
2263   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2264 
2265   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2266   ilu_permuted(level_graph);
2267 
2268   /* put together the new matrix */
2269   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2270   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2271   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2272   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2273   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2274   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2275   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2276 
2277   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2278   ierr = PetscContainerSetPointer(c, lgraph_p);
2279   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #undef __FUNCT__
2284 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2285 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2286 {
2287   PetscFunctionBegin;
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatSolve_MPIAIJ"
2293 /*
2294   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2295 */
2296 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2297 {
2298   namespace graph_dist = boost::graph::distributed;
2299 
2300   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2301   lgraph_type*   lgraph_p;
2302   PetscContainer c;
2303   PetscErrorCode ierr;
2304 
2305   PetscFunctionBegin;
2306   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2307   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2308   ierr = VecCopy(b, x);CHKERRQ(ierr);
2309 
2310   PetscScalar* array_x;
2311   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2312   PetscInt sx;
2313   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2314 
2315   PetscScalar* array_b;
2316   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2317   PetscInt sb;
2318   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2319 
2320   lgraph_type&   level_graph = *lgraph_p;
2321   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2322 
2323   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2324   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2325                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2326 
2327   typedef boost::iterator_property_map<array_ref_type::iterator,
2328                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2329   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2330                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2331 
2332   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2333 
2334   PetscFunctionReturn(0);
2335 }
2336 #endif
2337 
2338 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2339   PetscInt       nzlocal,nsends,nrecvs;
2340   PetscMPIInt    *send_rank,*recv_rank;
2341   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2342   PetscScalar    *sbuf_a,**rbuf_a;
2343   PetscErrorCode (*MatDestroy)(Mat);
2344 } Mat_Redundant;
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2348 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2349 {
2350   PetscErrorCode       ierr;
2351   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2352   PetscInt             i;
2353 
2354   PetscFunctionBegin;
2355   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2356   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2357   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2358   for (i=0; i<redund->nrecvs; i++){
2359     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2360     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2361   }
2362   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2363   ierr = PetscFree(redund);CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatDestroy_MatRedundant"
2369 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2370 {
2371   PetscErrorCode  ierr;
2372   PetscContainer  container;
2373   Mat_Redundant   *redund=PETSC_NULL;
2374 
2375   PetscFunctionBegin;
2376   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2377   if (container) {
2378     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2379   } else {
2380     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2381   }
2382   A->ops->destroy = redund->MatDestroy;
2383   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2384   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2385   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2391 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2392 {
2393   PetscMPIInt    rank,size;
2394   MPI_Comm       comm=((PetscObject)mat)->comm;
2395   PetscErrorCode ierr;
2396   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2397   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2398   PetscInt       *rowrange=mat->rmap->range;
2399   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2400   Mat            A=aij->A,B=aij->B,C=*matredundant;
2401   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2402   PetscScalar    *sbuf_a;
2403   PetscInt       nzlocal=a->nz+b->nz;
2404   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2405   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2406   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2407   MatScalar      *aworkA,*aworkB;
2408   PetscScalar    *vals;
2409   PetscMPIInt    tag1,tag2,tag3,imdex;
2410   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2411                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2412   MPI_Status     recv_status,*send_status;
2413   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2414   PetscInt       **rbuf_j=PETSC_NULL;
2415   PetscScalar    **rbuf_a=PETSC_NULL;
2416   Mat_Redundant  *redund=PETSC_NULL;
2417   PetscContainer container;
2418 
2419   PetscFunctionBegin;
2420   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2421   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2422 
2423   if (reuse == MAT_REUSE_MATRIX) {
2424     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2425     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2426     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2427     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2428     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2429     if (container) {
2430       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2431     } else {
2432       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2433     }
2434     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2435 
2436     nsends    = redund->nsends;
2437     nrecvs    = redund->nrecvs;
2438     send_rank = redund->send_rank;
2439     recv_rank = redund->recv_rank;
2440     sbuf_nz   = redund->sbuf_nz;
2441     rbuf_nz   = redund->rbuf_nz;
2442     sbuf_j    = redund->sbuf_j;
2443     sbuf_a    = redund->sbuf_a;
2444     rbuf_j    = redund->rbuf_j;
2445     rbuf_a    = redund->rbuf_a;
2446   }
2447 
2448   if (reuse == MAT_INITIAL_MATRIX){
2449     PetscMPIInt  subrank,subsize;
2450     PetscInt     nleftover,np_subcomm;
2451     /* get the destination processors' id send_rank, nsends and nrecvs */
2452     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2453     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2454     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2455     np_subcomm = size/nsubcomm;
2456     nleftover  = size - nsubcomm*np_subcomm;
2457     nsends = 0; nrecvs = 0;
2458     for (i=0; i<size; i++){ /* i=rank*/
2459       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2460         send_rank[nsends] = i; nsends++;
2461         recv_rank[nrecvs++] = i;
2462       }
2463     }
2464     if (rank >= size - nleftover){/* this proc is a leftover processor */
2465       i = size-nleftover-1;
2466       j = 0;
2467       while (j < nsubcomm - nleftover){
2468         send_rank[nsends++] = i;
2469         i--; j++;
2470       }
2471     }
2472 
2473     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2474       for (i=0; i<nleftover; i++){
2475         recv_rank[nrecvs++] = size-nleftover+i;
2476       }
2477     }
2478 
2479     /* allocate sbuf_j, sbuf_a */
2480     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2481     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2482     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2483   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2484 
2485   /* copy mat's local entries into the buffers */
2486   if (reuse == MAT_INITIAL_MATRIX){
2487     rownz_max = 0;
2488     rptr = sbuf_j;
2489     cols = sbuf_j + rend-rstart + 1;
2490     vals = sbuf_a;
2491     rptr[0] = 0;
2492     for (i=0; i<rend-rstart; i++){
2493       row = i + rstart;
2494       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2495       ncols  = nzA + nzB;
2496       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2497       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2498       /* load the column indices for this row into cols */
2499       lwrite = 0;
2500       for (l=0; l<nzB; l++) {
2501         if ((ctmp = bmap[cworkB[l]]) < cstart){
2502           vals[lwrite]   = aworkB[l];
2503           cols[lwrite++] = ctmp;
2504         }
2505       }
2506       for (l=0; l<nzA; l++){
2507         vals[lwrite]   = aworkA[l];
2508         cols[lwrite++] = cstart + cworkA[l];
2509       }
2510       for (l=0; l<nzB; l++) {
2511         if ((ctmp = bmap[cworkB[l]]) >= cend){
2512           vals[lwrite]   = aworkB[l];
2513           cols[lwrite++] = ctmp;
2514         }
2515       }
2516       vals += ncols;
2517       cols += ncols;
2518       rptr[i+1] = rptr[i] + ncols;
2519       if (rownz_max < ncols) rownz_max = ncols;
2520     }
2521     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2522   } else { /* only copy matrix values into sbuf_a */
2523     rptr = sbuf_j;
2524     vals = sbuf_a;
2525     rptr[0] = 0;
2526     for (i=0; i<rend-rstart; i++){
2527       row = i + rstart;
2528       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2529       ncols  = nzA + nzB;
2530       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2531       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2532       lwrite = 0;
2533       for (l=0; l<nzB; l++) {
2534         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2535       }
2536       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2537       for (l=0; l<nzB; l++) {
2538         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2539       }
2540       vals += ncols;
2541       rptr[i+1] = rptr[i] + ncols;
2542     }
2543   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2544 
2545   /* send nzlocal to others, and recv other's nzlocal */
2546   /*--------------------------------------------------*/
2547   if (reuse == MAT_INITIAL_MATRIX){
2548     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2549     s_waits2 = s_waits3 + nsends;
2550     s_waits1 = s_waits2 + nsends;
2551     r_waits1 = s_waits1 + nsends;
2552     r_waits2 = r_waits1 + nrecvs;
2553     r_waits3 = r_waits2 + nrecvs;
2554   } else {
2555     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2556     r_waits3 = s_waits3 + nsends;
2557   }
2558 
2559   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2560   if (reuse == MAT_INITIAL_MATRIX){
2561     /* get new tags to keep the communication clean */
2562     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2563     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2564     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2565 
2566     /* post receives of other's nzlocal */
2567     for (i=0; i<nrecvs; i++){
2568       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2569     }
2570     /* send nzlocal to others */
2571     for (i=0; i<nsends; i++){
2572       sbuf_nz[i] = nzlocal;
2573       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2574     }
2575     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2576     count = nrecvs;
2577     while (count) {
2578       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2579       recv_rank[imdex] = recv_status.MPI_SOURCE;
2580       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2581       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2582 
2583       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2584       rbuf_nz[imdex] += i + 2;
2585       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2586       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2587       count--;
2588     }
2589     /* wait on sends of nzlocal */
2590     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2591     /* send mat->i,j to others, and recv from other's */
2592     /*------------------------------------------------*/
2593     for (i=0; i<nsends; i++){
2594       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2595       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2596     }
2597     /* wait on receives of mat->i,j */
2598     /*------------------------------*/
2599     count = nrecvs;
2600     while (count) {
2601       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2602       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2603       count--;
2604     }
2605     /* wait on sends of mat->i,j */
2606     /*---------------------------*/
2607     if (nsends) {
2608       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2609     }
2610   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2611 
2612   /* post receives, send and receive mat->a */
2613   /*----------------------------------------*/
2614   for (imdex=0; imdex<nrecvs; imdex++) {
2615     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2616   }
2617   for (i=0; i<nsends; i++){
2618     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2619   }
2620   count = nrecvs;
2621   while (count) {
2622     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2623     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2624     count--;
2625   }
2626   if (nsends) {
2627     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2628   }
2629 
2630   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2631 
2632   /* create redundant matrix */
2633   /*-------------------------*/
2634   if (reuse == MAT_INITIAL_MATRIX){
2635     /* compute rownz_max for preallocation */
2636     for (imdex=0; imdex<nrecvs; imdex++){
2637       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2638       rptr = rbuf_j[imdex];
2639       for (i=0; i<j; i++){
2640         ncols = rptr[i+1] - rptr[i];
2641         if (rownz_max < ncols) rownz_max = ncols;
2642       }
2643     }
2644 
2645     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2646     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2647     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2648     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2649     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2650   } else {
2651     C = *matredundant;
2652   }
2653 
2654   /* insert local matrix entries */
2655   rptr = sbuf_j;
2656   cols = sbuf_j + rend-rstart + 1;
2657   vals = sbuf_a;
2658   for (i=0; i<rend-rstart; i++){
2659     row   = i + rstart;
2660     ncols = rptr[i+1] - rptr[i];
2661     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2662     vals += ncols;
2663     cols += ncols;
2664   }
2665   /* insert received matrix entries */
2666   for (imdex=0; imdex<nrecvs; imdex++){
2667     rstart = rowrange[recv_rank[imdex]];
2668     rend   = rowrange[recv_rank[imdex]+1];
2669     rptr = rbuf_j[imdex];
2670     cols = rbuf_j[imdex] + rend-rstart + 1;
2671     vals = rbuf_a[imdex];
2672     for (i=0; i<rend-rstart; i++){
2673       row   = i + rstart;
2674       ncols = rptr[i+1] - rptr[i];
2675       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2676       vals += ncols;
2677       cols += ncols;
2678     }
2679   }
2680   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2681   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2682   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2683   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2684   if (reuse == MAT_INITIAL_MATRIX){
2685     PetscContainer container;
2686     *matredundant = C;
2687     /* create a supporting struct and attach it to C for reuse */
2688     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2689     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2690     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2691     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2692     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2693 
2694     redund->nzlocal = nzlocal;
2695     redund->nsends  = nsends;
2696     redund->nrecvs  = nrecvs;
2697     redund->send_rank = send_rank;
2698     redund->recv_rank = recv_rank;
2699     redund->sbuf_nz = sbuf_nz;
2700     redund->rbuf_nz = rbuf_nz;
2701     redund->sbuf_j  = sbuf_j;
2702     redund->sbuf_a  = sbuf_a;
2703     redund->rbuf_j  = rbuf_j;
2704     redund->rbuf_a  = rbuf_a;
2705 
2706     redund->MatDestroy = C->ops->destroy;
2707     C->ops->destroy    = MatDestroy_MatRedundant;
2708   }
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2714 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2715 {
2716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2717   PetscErrorCode ierr;
2718   PetscInt       i,*idxb = 0;
2719   PetscScalar    *va,*vb;
2720   Vec            vtmp;
2721 
2722   PetscFunctionBegin;
2723   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2724   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2725   if (idx) {
2726     for (i=0; i<A->rmap->n; i++) {
2727       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2728     }
2729   }
2730 
2731   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2732   if (idx) {
2733     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2734   }
2735   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2736   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2737 
2738   for (i=0; i<A->rmap->n; i++){
2739     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2740       va[i] = vb[i];
2741       if (idx) idx[i] = a->garray[idxb[i]];
2742     }
2743   }
2744 
2745   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2746   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2747   ierr = PetscFree(idxb);CHKERRQ(ierr);
2748   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2754 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2755 {
2756   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2757   PetscErrorCode ierr;
2758   PetscInt       i,*idxb = 0;
2759   PetscScalar    *va,*vb;
2760   Vec            vtmp;
2761 
2762   PetscFunctionBegin;
2763   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2764   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2765   if (idx) {
2766     for (i=0; i<A->cmap->n; i++) {
2767       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2768     }
2769   }
2770 
2771   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2772   if (idx) {
2773     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2774   }
2775   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2776   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2777 
2778   for (i=0; i<A->rmap->n; i++){
2779     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2780       va[i] = vb[i];
2781       if (idx) idx[i] = a->garray[idxb[i]];
2782     }
2783   }
2784 
2785   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2786   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2787   ierr = PetscFree(idxb);CHKERRQ(ierr);
2788   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2789   PetscFunctionReturn(0);
2790 }
2791 
2792 #undef __FUNCT__
2793 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2794 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2795 {
2796   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2797   PetscInt       n      = A->rmap->n;
2798   PetscInt       cstart = A->cmap->rstart;
2799   PetscInt      *cmap   = mat->garray;
2800   PetscInt      *diagIdx, *offdiagIdx;
2801   Vec            diagV, offdiagV;
2802   PetscScalar   *a, *diagA, *offdiagA;
2803   PetscInt       r;
2804   PetscErrorCode ierr;
2805 
2806   PetscFunctionBegin;
2807   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2808   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2809   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2810   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2811   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2812   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2813   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2814   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2815   for(r = 0; r < n; ++r) {
2816     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2817       a[r]   = diagA[r];
2818       idx[r] = cstart + diagIdx[r];
2819     } else {
2820       a[r]   = offdiagA[r];
2821       idx[r] = cmap[offdiagIdx[r]];
2822     }
2823   }
2824   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2825   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2826   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2827   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2828   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2829   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2835 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2836 {
2837   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2838   PetscInt       n      = A->rmap->n;
2839   PetscInt       cstart = A->cmap->rstart;
2840   PetscInt      *cmap   = mat->garray;
2841   PetscInt      *diagIdx, *offdiagIdx;
2842   Vec            diagV, offdiagV;
2843   PetscScalar   *a, *diagA, *offdiagA;
2844   PetscInt       r;
2845   PetscErrorCode ierr;
2846 
2847   PetscFunctionBegin;
2848   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2849   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2850   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2851   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2852   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2853   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2854   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2855   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2856   for(r = 0; r < n; ++r) {
2857     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2858       a[r]   = diagA[r];
2859       idx[r] = cstart + diagIdx[r];
2860     } else {
2861       a[r]   = offdiagA[r];
2862       idx[r] = cmap[offdiagIdx[r]];
2863     }
2864   }
2865   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2866   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2867   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2868   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2869   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2870   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2876 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2877 {
2878   PetscErrorCode ierr;
2879   Mat            *dummy;
2880 
2881   PetscFunctionBegin;
2882   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2883   *newmat = *dummy;
2884   ierr = PetscFree(dummy);CHKERRQ(ierr);
2885   PetscFunctionReturn(0);
2886 }
2887 
2888 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2889 /* -------------------------------------------------------------------*/
2890 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2891        MatGetRow_MPIAIJ,
2892        MatRestoreRow_MPIAIJ,
2893        MatMult_MPIAIJ,
2894 /* 4*/ MatMultAdd_MPIAIJ,
2895        MatMultTranspose_MPIAIJ,
2896        MatMultTransposeAdd_MPIAIJ,
2897 #ifdef PETSC_HAVE_PBGL
2898        MatSolve_MPIAIJ,
2899 #else
2900        0,
2901 #endif
2902        0,
2903        0,
2904 /*10*/ 0,
2905        0,
2906        0,
2907        MatSOR_MPIAIJ,
2908        MatTranspose_MPIAIJ,
2909 /*15*/ MatGetInfo_MPIAIJ,
2910        MatEqual_MPIAIJ,
2911        MatGetDiagonal_MPIAIJ,
2912        MatDiagonalScale_MPIAIJ,
2913        MatNorm_MPIAIJ,
2914 /*20*/ MatAssemblyBegin_MPIAIJ,
2915        MatAssemblyEnd_MPIAIJ,
2916        MatSetOption_MPIAIJ,
2917        MatZeroEntries_MPIAIJ,
2918 /*24*/ MatZeroRows_MPIAIJ,
2919        0,
2920 #ifdef PETSC_HAVE_PBGL
2921        0,
2922 #else
2923        0,
2924 #endif
2925        0,
2926        0,
2927 /*29*/ MatSetUpPreallocation_MPIAIJ,
2928 #ifdef PETSC_HAVE_PBGL
2929        0,
2930 #else
2931        0,
2932 #endif
2933        0,
2934        0,
2935        0,
2936 /*34*/ MatDuplicate_MPIAIJ,
2937        0,
2938        0,
2939        0,
2940        0,
2941 /*39*/ MatAXPY_MPIAIJ,
2942        MatGetSubMatrices_MPIAIJ,
2943        MatIncreaseOverlap_MPIAIJ,
2944        MatGetValues_MPIAIJ,
2945        MatCopy_MPIAIJ,
2946 /*44*/ MatGetRowMax_MPIAIJ,
2947        MatScale_MPIAIJ,
2948        0,
2949        0,
2950        MatZeroRowsColumns_MPIAIJ,
2951 /*49*/ MatSetBlockSize_MPIAIJ,
2952        0,
2953        0,
2954        0,
2955        0,
2956 /*54*/ MatFDColoringCreate_MPIAIJ,
2957        0,
2958        MatSetUnfactored_MPIAIJ,
2959        MatPermute_MPIAIJ,
2960        0,
2961 /*59*/ MatGetSubMatrix_MPIAIJ,
2962        MatDestroy_MPIAIJ,
2963        MatView_MPIAIJ,
2964        0,
2965        0,
2966 /*64*/ 0,
2967        0,
2968        0,
2969        0,
2970        0,
2971 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2972        MatGetRowMinAbs_MPIAIJ,
2973        0,
2974        MatSetColoring_MPIAIJ,
2975 #if defined(PETSC_HAVE_ADIC)
2976        MatSetValuesAdic_MPIAIJ,
2977 #else
2978        0,
2979 #endif
2980        MatSetValuesAdifor_MPIAIJ,
2981 /*75*/ MatFDColoringApply_AIJ,
2982        0,
2983        0,
2984        0,
2985        0,
2986 /*80*/ 0,
2987        0,
2988        0,
2989 /*83*/ MatLoad_MPIAIJ,
2990        0,
2991        0,
2992        0,
2993        0,
2994        0,
2995 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2996        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2997        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2998        MatPtAP_Basic,
2999        MatPtAPSymbolic_MPIAIJ,
3000 /*94*/ MatPtAPNumeric_MPIAIJ,
3001        0,
3002        0,
3003        0,
3004        0,
3005 /*99*/ 0,
3006        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3007        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3008        MatConjugate_MPIAIJ,
3009        0,
3010 /*104*/MatSetValuesRow_MPIAIJ,
3011        MatRealPart_MPIAIJ,
3012        MatImaginaryPart_MPIAIJ,
3013        0,
3014        0,
3015 /*109*/0,
3016        MatGetRedundantMatrix_MPIAIJ,
3017        MatGetRowMin_MPIAIJ,
3018        0,
3019        0,
3020 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3021        0,
3022        0,
3023        0,
3024        0,
3025 /*119*/0,
3026        0,
3027        0,
3028        0,
3029        MatGetMultiProcBlock_MPIAIJ,
3030 /*124*/MatFindNonZeroRows_MPIAIJ,
3031        0,
3032        0,
3033        0,
3034        MatGetSubMatricesParallel_MPIAIJ
3035 };
3036 
3037 /* ----------------------------------------------------------------------------------------*/
3038 
3039 EXTERN_C_BEGIN
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3042 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3043 {
3044   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3045   PetscErrorCode ierr;
3046 
3047   PetscFunctionBegin;
3048   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3049   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 EXTERN_C_END
3053 
3054 EXTERN_C_BEGIN
3055 #undef __FUNCT__
3056 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3057 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3058 {
3059   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3060   PetscErrorCode ierr;
3061 
3062   PetscFunctionBegin;
3063   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3064   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3065   PetscFunctionReturn(0);
3066 }
3067 EXTERN_C_END
3068 
3069 EXTERN_C_BEGIN
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3072 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3073 {
3074   Mat_MPIAIJ     *b;
3075   PetscErrorCode ierr;
3076   PetscInt       i;
3077 
3078   PetscFunctionBegin;
3079   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3080   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3081   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3082   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3083 
3084   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3085   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3086   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3087   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3088   if (d_nnz) {
3089     for (i=0; i<B->rmap->n; i++) {
3090       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3091     }
3092   }
3093   if (o_nnz) {
3094     for (i=0; i<B->rmap->n; i++) {
3095       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3096     }
3097   }
3098   b = (Mat_MPIAIJ*)B->data;
3099 
3100   if (!B->preallocated) {
3101     /* Explicitly create 2 MATSEQAIJ matrices. */
3102     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3103     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3104     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3105     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3106     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3107     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3108     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3109     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3110   }
3111 
3112   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3113   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3114   B->preallocated = PETSC_TRUE;
3115   PetscFunctionReturn(0);
3116 }
3117 EXTERN_C_END
3118 
3119 #undef __FUNCT__
3120 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3121 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3122 {
3123   Mat            mat;
3124   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3125   PetscErrorCode ierr;
3126 
3127   PetscFunctionBegin;
3128   *newmat       = 0;
3129   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3130   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3131   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3132   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3133   a    = (Mat_MPIAIJ*)mat->data;
3134 
3135   mat->factortype    = matin->factortype;
3136   mat->rmap->bs      = matin->rmap->bs;
3137   mat->assembled    = PETSC_TRUE;
3138   mat->insertmode   = NOT_SET_VALUES;
3139   mat->preallocated = PETSC_TRUE;
3140 
3141   a->size           = oldmat->size;
3142   a->rank           = oldmat->rank;
3143   a->donotstash     = oldmat->donotstash;
3144   a->roworiented    = oldmat->roworiented;
3145   a->rowindices     = 0;
3146   a->rowvalues      = 0;
3147   a->getrowactive   = PETSC_FALSE;
3148 
3149   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3150   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3151 
3152   if (oldmat->colmap) {
3153 #if defined (PETSC_USE_CTABLE)
3154     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3155 #else
3156     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3157     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3158     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3159 #endif
3160   } else a->colmap = 0;
3161   if (oldmat->garray) {
3162     PetscInt len;
3163     len  = oldmat->B->cmap->n;
3164     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3165     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3166     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3167   } else a->garray = 0;
3168 
3169   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3170   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3171   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3172   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3173   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3174   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3175   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3176   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3177   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3178   *newmat = mat;
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 /*
3183     Allows sending/receiving larger messages then 2 gigabytes in a single call
3184 */
3185 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3186 {
3187   int             ierr;
3188   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3189   PetscInt        i,numchunks;
3190   PetscMPIInt     icnt;
3191 
3192   numchunks = cnt/CHUNKSIZE + 1;
3193   for (i=0; i<numchunks; i++) {
3194     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3195     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3196     if (type == MPIU_INT) {
3197       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3198     } else if (type == MPIU_SCALAR) {
3199       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3200     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3201   }
3202   return 0;
3203 }
3204 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3205 {
3206   int             ierr;
3207   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3208   MPI_Status      status;
3209   PetscInt        i,numchunks;
3210   PetscMPIInt     icnt;
3211 
3212   numchunks = cnt/CHUNKSIZE + 1;
3213   for (i=0; i<numchunks; i++) {
3214     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3215     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3216     if (type == MPIU_INT) {
3217       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3218     } else if (type == MPIU_SCALAR) {
3219       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3220     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3221   }
3222   return 0;
3223 }
3224 
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatLoad_MPIAIJ"
3227 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3228 {
3229   PetscScalar    *vals,*svals;
3230   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3231   PetscErrorCode ierr;
3232   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3233   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3234   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3235   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3236   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3237   int            fd;
3238 
3239   PetscFunctionBegin;
3240   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3241   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3242   if (!rank) {
3243     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3244     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3245     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3246   }
3247 
3248   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3249 
3250   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3251   M = header[1]; N = header[2];
3252   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3253   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3254   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3255 
3256   /* If global sizes are set, check if they are consistent with that given in the file */
3257   if (sizesset) {
3258     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3259   }
3260   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3261   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3262 
3263   /* determine ownership of all rows */
3264   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3265   else m = newMat->rmap->n; /* Set by user */
3266 
3267   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3268   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3269 
3270   /* First process needs enough room for process with most rows */
3271   if (!rank) {
3272     mmax       = rowners[1];
3273     for (i=2; i<size; i++) {
3274       mmax = PetscMax(mmax,rowners[i]);
3275     }
3276   } else mmax = m;
3277 
3278   rowners[0] = 0;
3279   for (i=2; i<=size; i++) {
3280     rowners[i] += rowners[i-1];
3281   }
3282   rstart = rowners[rank];
3283   rend   = rowners[rank+1];
3284 
3285   /* distribute row lengths to all processors */
3286   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3287   if (!rank) {
3288     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3289     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3290     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3291     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3292     for (j=0; j<m; j++) {
3293       procsnz[0] += ourlens[j];
3294     }
3295     for (i=1; i<size; i++) {
3296       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3297       /* calculate the number of nonzeros on each processor */
3298       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3299         procsnz[i] += rowlengths[j];
3300       }
3301       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3302     }
3303     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3304   } else {
3305     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3306   }
3307 
3308   if (!rank) {
3309     /* determine max buffer needed and allocate it */
3310     maxnz = 0;
3311     for (i=0; i<size; i++) {
3312       maxnz = PetscMax(maxnz,procsnz[i]);
3313     }
3314     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3315 
3316     /* read in my part of the matrix column indices  */
3317     nz   = procsnz[0];
3318     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3319     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3320 
3321     /* read in every one elses and ship off */
3322     for (i=1; i<size; i++) {
3323       nz     = procsnz[i];
3324       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3325       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3326     }
3327     ierr = PetscFree(cols);CHKERRQ(ierr);
3328   } else {
3329     /* determine buffer space needed for message */
3330     nz = 0;
3331     for (i=0; i<m; i++) {
3332       nz += ourlens[i];
3333     }
3334     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3335 
3336     /* receive message of column indices*/
3337     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3338   }
3339 
3340   /* determine column ownership if matrix is not square */
3341   if (N != M) {
3342     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3343     else n = newMat->cmap->n;
3344     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3345     cstart = cend - n;
3346   } else {
3347     cstart = rstart;
3348     cend   = rend;
3349     n      = cend - cstart;
3350   }
3351 
3352   /* loop over local rows, determining number of off diagonal entries */
3353   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3354   jj = 0;
3355   for (i=0; i<m; i++) {
3356     for (j=0; j<ourlens[i]; j++) {
3357       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3358       jj++;
3359     }
3360   }
3361 
3362   for (i=0; i<m; i++) {
3363     ourlens[i] -= offlens[i];
3364   }
3365   if (!sizesset) {
3366     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3367   }
3368   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3369 
3370   for (i=0; i<m; i++) {
3371     ourlens[i] += offlens[i];
3372   }
3373 
3374   if (!rank) {
3375     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3376 
3377     /* read in my part of the matrix numerical values  */
3378     nz   = procsnz[0];
3379     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3380 
3381     /* insert into matrix */
3382     jj      = rstart;
3383     smycols = mycols;
3384     svals   = vals;
3385     for (i=0; i<m; i++) {
3386       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3387       smycols += ourlens[i];
3388       svals   += ourlens[i];
3389       jj++;
3390     }
3391 
3392     /* read in other processors and ship out */
3393     for (i=1; i<size; i++) {
3394       nz     = procsnz[i];
3395       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3396       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3397     }
3398     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3399   } else {
3400     /* receive numeric values */
3401     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3402 
3403     /* receive message of values*/
3404     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3405 
3406     /* insert into matrix */
3407     jj      = rstart;
3408     smycols = mycols;
3409     svals   = vals;
3410     for (i=0; i<m; i++) {
3411       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3412       smycols += ourlens[i];
3413       svals   += ourlens[i];
3414       jj++;
3415     }
3416   }
3417   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3418   ierr = PetscFree(vals);CHKERRQ(ierr);
3419   ierr = PetscFree(mycols);CHKERRQ(ierr);
3420   ierr = PetscFree(rowners);CHKERRQ(ierr);
3421 
3422   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3423   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3424   PetscFunctionReturn(0);
3425 }
3426 
3427 #undef __FUNCT__
3428 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3429 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3430 {
3431   PetscErrorCode ierr;
3432   IS             iscol_local;
3433   PetscInt       csize;
3434 
3435   PetscFunctionBegin;
3436   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3437   if (call == MAT_REUSE_MATRIX) {
3438     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3439     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3440   } else {
3441     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3442   }
3443   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3444   if (call == MAT_INITIAL_MATRIX) {
3445     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3446     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3447   }
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3453 /*
3454     Not great since it makes two copies of the submatrix, first an SeqAIJ
3455   in local and then by concatenating the local matrices the end result.
3456   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3457 
3458   Note: This requires a sequential iscol with all indices.
3459 */
3460 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3461 {
3462   PetscErrorCode ierr;
3463   PetscMPIInt    rank,size;
3464   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3465   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3466   Mat            *local,M,Mreuse;
3467   MatScalar      *vwork,*aa;
3468   MPI_Comm       comm = ((PetscObject)mat)->comm;
3469   Mat_SeqAIJ     *aij;
3470 
3471 
3472   PetscFunctionBegin;
3473   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3474   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3475 
3476   if (call ==  MAT_REUSE_MATRIX) {
3477     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3478     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3479     local = &Mreuse;
3480     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3481   } else {
3482     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3483     Mreuse = *local;
3484     ierr   = PetscFree(local);CHKERRQ(ierr);
3485   }
3486 
3487   /*
3488       m - number of local rows
3489       n - number of columns (same on all processors)
3490       rstart - first row in new global matrix generated
3491   */
3492   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3493   if (call == MAT_INITIAL_MATRIX) {
3494     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3495     ii  = aij->i;
3496     jj  = aij->j;
3497 
3498     /*
3499         Determine the number of non-zeros in the diagonal and off-diagonal
3500         portions of the matrix in order to do correct preallocation
3501     */
3502 
3503     /* first get start and end of "diagonal" columns */
3504     if (csize == PETSC_DECIDE) {
3505       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3506       if (mglobal == n) { /* square matrix */
3507 	nlocal = m;
3508       } else {
3509         nlocal = n/size + ((n % size) > rank);
3510       }
3511     } else {
3512       nlocal = csize;
3513     }
3514     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3515     rstart = rend - nlocal;
3516     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3517 
3518     /* next, compute all the lengths */
3519     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3520     olens = dlens + m;
3521     for (i=0; i<m; i++) {
3522       jend = ii[i+1] - ii[i];
3523       olen = 0;
3524       dlen = 0;
3525       for (j=0; j<jend; j++) {
3526         if (*jj < rstart || *jj >= rend) olen++;
3527         else dlen++;
3528         jj++;
3529       }
3530       olens[i] = olen;
3531       dlens[i] = dlen;
3532     }
3533     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3534     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3535     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3536     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3537     ierr = PetscFree(dlens);CHKERRQ(ierr);
3538   } else {
3539     PetscInt ml,nl;
3540 
3541     M = *newmat;
3542     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3543     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3544     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3545     /*
3546          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3547        rather than the slower MatSetValues().
3548     */
3549     M->was_assembled = PETSC_TRUE;
3550     M->assembled     = PETSC_FALSE;
3551   }
3552   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3553   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3554   ii  = aij->i;
3555   jj  = aij->j;
3556   aa  = aij->a;
3557   for (i=0; i<m; i++) {
3558     row   = rstart + i;
3559     nz    = ii[i+1] - ii[i];
3560     cwork = jj;     jj += nz;
3561     vwork = aa;     aa += nz;
3562     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3563   }
3564 
3565   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3566   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567   *newmat = M;
3568 
3569   /* save submatrix used in processor for next request */
3570   if (call ==  MAT_INITIAL_MATRIX) {
3571     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3572     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3573   }
3574 
3575   PetscFunctionReturn(0);
3576 }
3577 
3578 EXTERN_C_BEGIN
3579 #undef __FUNCT__
3580 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3581 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3582 {
3583   PetscInt       m,cstart, cend,j,nnz,i,d;
3584   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3585   const PetscInt *JJ;
3586   PetscScalar    *values;
3587   PetscErrorCode ierr;
3588 
3589   PetscFunctionBegin;
3590   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3591 
3592   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3593   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3594   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3595   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3596   m      = B->rmap->n;
3597   cstart = B->cmap->rstart;
3598   cend   = B->cmap->rend;
3599   rstart = B->rmap->rstart;
3600 
3601   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3602 
3603 #if defined(PETSC_USE_DEBUGGING)
3604   for (i=0; i<m; i++) {
3605     nnz     = Ii[i+1]- Ii[i];
3606     JJ      = J + Ii[i];
3607     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3608     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3609     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3610   }
3611 #endif
3612 
3613   for (i=0; i<m; i++) {
3614     nnz     = Ii[i+1]- Ii[i];
3615     JJ      = J + Ii[i];
3616     nnz_max = PetscMax(nnz_max,nnz);
3617     d       = 0;
3618     for (j=0; j<nnz; j++) {
3619       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3620     }
3621     d_nnz[i] = d;
3622     o_nnz[i] = nnz - d;
3623   }
3624   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3625   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3626 
3627   if (v) values = (PetscScalar*)v;
3628   else {
3629     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3630     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3631   }
3632 
3633   for (i=0; i<m; i++) {
3634     ii   = i + rstart;
3635     nnz  = Ii[i+1]- Ii[i];
3636     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3637   }
3638   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3639   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3640 
3641   if (!v) {
3642     ierr = PetscFree(values);CHKERRQ(ierr);
3643   }
3644   PetscFunctionReturn(0);
3645 }
3646 EXTERN_C_END
3647 
3648 #undef __FUNCT__
3649 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3650 /*@
3651    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3652    (the default parallel PETSc format).
3653 
3654    Collective on MPI_Comm
3655 
3656    Input Parameters:
3657 +  B - the matrix
3658 .  i - the indices into j for the start of each local row (starts with zero)
3659 .  j - the column indices for each local row (starts with zero)
3660 -  v - optional values in the matrix
3661 
3662    Level: developer
3663 
3664    Notes:
3665        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3666      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3667      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3668 
3669        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3670 
3671        The format which is used for the sparse matrix input, is equivalent to a
3672     row-major ordering.. i.e for the following matrix, the input data expected is
3673     as shown:
3674 
3675         1 0 0
3676         2 0 3     P0
3677        -------
3678         4 5 6     P1
3679 
3680      Process0 [P0]: rows_owned=[0,1]
3681         i =  {0,1,3}  [size = nrow+1  = 2+1]
3682         j =  {0,0,2}  [size = nz = 6]
3683         v =  {1,2,3}  [size = nz = 6]
3684 
3685      Process1 [P1]: rows_owned=[2]
3686         i =  {0,3}    [size = nrow+1  = 1+1]
3687         j =  {0,1,2}  [size = nz = 6]
3688         v =  {4,5,6}  [size = nz = 6]
3689 
3690 .keywords: matrix, aij, compressed row, sparse, parallel
3691 
3692 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3693           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3694 @*/
3695 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3696 {
3697   PetscErrorCode ierr;
3698 
3699   PetscFunctionBegin;
3700   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3701   PetscFunctionReturn(0);
3702 }
3703 
3704 #undef __FUNCT__
3705 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3706 /*@C
3707    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3708    (the default parallel PETSc format).  For good matrix assembly performance
3709    the user should preallocate the matrix storage by setting the parameters
3710    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3711    performance can be increased by more than a factor of 50.
3712 
3713    Collective on MPI_Comm
3714 
3715    Input Parameters:
3716 +  A - the matrix
3717 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3718            (same value is used for all local rows)
3719 .  d_nnz - array containing the number of nonzeros in the various rows of the
3720            DIAGONAL portion of the local submatrix (possibly different for each row)
3721            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3722            The size of this array is equal to the number of local rows, i.e 'm'.
3723            You must leave room for the diagonal entry even if it is zero.
3724 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3725            submatrix (same value is used for all local rows).
3726 -  o_nnz - array containing the number of nonzeros in the various rows of the
3727            OFF-DIAGONAL portion of the local submatrix (possibly different for
3728            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3729            structure. The size of this array is equal to the number
3730            of local rows, i.e 'm'.
3731 
3732    If the *_nnz parameter is given then the *_nz parameter is ignored
3733 
3734    The AIJ format (also called the Yale sparse matrix format or
3735    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3736    storage.  The stored row and column indices begin with zero.
3737    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3738 
3739    The parallel matrix is partitioned such that the first m0 rows belong to
3740    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3741    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3742 
3743    The DIAGONAL portion of the local submatrix of a processor can be defined
3744    as the submatrix which is obtained by extraction the part corresponding to
3745    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3746    first row that belongs to the processor, r2 is the last row belonging to
3747    the this processor, and c1-c2 is range of indices of the local part of a
3748    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3749    common case of a square matrix, the row and column ranges are the same and
3750    the DIAGONAL part is also square. The remaining portion of the local
3751    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3752 
3753    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3754 
3755    You can call MatGetInfo() to get information on how effective the preallocation was;
3756    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3757    You can also run with the option -info and look for messages with the string
3758    malloc in them to see if additional memory allocation was needed.
3759 
3760    Example usage:
3761 
3762    Consider the following 8x8 matrix with 34 non-zero values, that is
3763    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3764    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3765    as follows:
3766 
3767 .vb
3768             1  2  0  |  0  3  0  |  0  4
3769     Proc0   0  5  6  |  7  0  0  |  8  0
3770             9  0 10  | 11  0  0  | 12  0
3771     -------------------------------------
3772            13  0 14  | 15 16 17  |  0  0
3773     Proc1   0 18  0  | 19 20 21  |  0  0
3774             0  0  0  | 22 23  0  | 24  0
3775     -------------------------------------
3776     Proc2  25 26 27  |  0  0 28  | 29  0
3777            30  0  0  | 31 32 33  |  0 34
3778 .ve
3779 
3780    This can be represented as a collection of submatrices as:
3781 
3782 .vb
3783       A B C
3784       D E F
3785       G H I
3786 .ve
3787 
3788    Where the submatrices A,B,C are owned by proc0, D,E,F are
3789    owned by proc1, G,H,I are owned by proc2.
3790 
3791    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3792    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3793    The 'M','N' parameters are 8,8, and have the same values on all procs.
3794 
3795    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3796    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3797    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3798    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3799    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3800    matrix, ans [DF] as another SeqAIJ matrix.
3801 
3802    When d_nz, o_nz parameters are specified, d_nz storage elements are
3803    allocated for every row of the local diagonal submatrix, and o_nz
3804    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3805    One way to choose d_nz and o_nz is to use the max nonzerors per local
3806    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3807    In this case, the values of d_nz,o_nz are:
3808 .vb
3809      proc0 : dnz = 2, o_nz = 2
3810      proc1 : dnz = 3, o_nz = 2
3811      proc2 : dnz = 1, o_nz = 4
3812 .ve
3813    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3814    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3815    for proc3. i.e we are using 12+15+10=37 storage locations to store
3816    34 values.
3817 
3818    When d_nnz, o_nnz parameters are specified, the storage is specified
3819    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3820    In the above case the values for d_nnz,o_nnz are:
3821 .vb
3822      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3823      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3824      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3825 .ve
3826    Here the space allocated is sum of all the above values i.e 34, and
3827    hence pre-allocation is perfect.
3828 
3829    Level: intermediate
3830 
3831 .keywords: matrix, aij, compressed row, sparse, parallel
3832 
3833 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3834           MPIAIJ, MatGetInfo()
3835 @*/
3836 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3837 {
3838   PetscErrorCode ierr;
3839 
3840   PetscFunctionBegin;
3841   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3842   PetscFunctionReturn(0);
3843 }
3844 
3845 #undef __FUNCT__
3846 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3847 /*@
3848      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3849          CSR format the local rows.
3850 
3851    Collective on MPI_Comm
3852 
3853    Input Parameters:
3854 +  comm - MPI communicator
3855 .  m - number of local rows (Cannot be PETSC_DECIDE)
3856 .  n - This value should be the same as the local size used in creating the
3857        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3858        calculated if N is given) For square matrices n is almost always m.
3859 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3860 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3861 .   i - row indices
3862 .   j - column indices
3863 -   a - matrix values
3864 
3865    Output Parameter:
3866 .   mat - the matrix
3867 
3868    Level: intermediate
3869 
3870    Notes:
3871        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3872      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3873      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3874 
3875        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3876 
3877        The format which is used for the sparse matrix input, is equivalent to a
3878     row-major ordering.. i.e for the following matrix, the input data expected is
3879     as shown:
3880 
3881         1 0 0
3882         2 0 3     P0
3883        -------
3884         4 5 6     P1
3885 
3886      Process0 [P0]: rows_owned=[0,1]
3887         i =  {0,1,3}  [size = nrow+1  = 2+1]
3888         j =  {0,0,2}  [size = nz = 6]
3889         v =  {1,2,3}  [size = nz = 6]
3890 
3891      Process1 [P1]: rows_owned=[2]
3892         i =  {0,3}    [size = nrow+1  = 1+1]
3893         j =  {0,1,2}  [size = nz = 6]
3894         v =  {4,5,6}  [size = nz = 6]
3895 
3896 .keywords: matrix, aij, compressed row, sparse, parallel
3897 
3898 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3899           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3900 @*/
3901 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3902 {
3903   PetscErrorCode ierr;
3904 
3905  PetscFunctionBegin;
3906   if (i[0]) {
3907     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3908   }
3909   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3910   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3911   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3912   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3913   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 
3917 #undef __FUNCT__
3918 #define __FUNCT__ "MatCreateMPIAIJ"
3919 /*@C
3920    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3921    (the default parallel PETSc format).  For good matrix assembly performance
3922    the user should preallocate the matrix storage by setting the parameters
3923    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3924    performance can be increased by more than a factor of 50.
3925 
3926    Collective on MPI_Comm
3927 
3928    Input Parameters:
3929 +  comm - MPI communicator
3930 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3931            This value should be the same as the local size used in creating the
3932            y vector for the matrix-vector product y = Ax.
3933 .  n - This value should be the same as the local size used in creating the
3934        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3935        calculated if N is given) For square matrices n is almost always m.
3936 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3937 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3938 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3939            (same value is used for all local rows)
3940 .  d_nnz - array containing the number of nonzeros in the various rows of the
3941            DIAGONAL portion of the local submatrix (possibly different for each row)
3942            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3943            The size of this array is equal to the number of local rows, i.e 'm'.
3944            You must leave room for the diagonal entry even if it is zero.
3945 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3946            submatrix (same value is used for all local rows).
3947 -  o_nnz - array containing the number of nonzeros in the various rows of the
3948            OFF-DIAGONAL portion of the local submatrix (possibly different for
3949            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3950            structure. The size of this array is equal to the number
3951            of local rows, i.e 'm'.
3952 
3953    Output Parameter:
3954 .  A - the matrix
3955 
3956    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3957    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3958    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3959 
3960    Notes:
3961    If the *_nnz parameter is given then the *_nz parameter is ignored
3962 
3963    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3964    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3965    storage requirements for this matrix.
3966 
3967    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3968    processor than it must be used on all processors that share the object for
3969    that argument.
3970 
3971    The user MUST specify either the local or global matrix dimensions
3972    (possibly both).
3973 
3974    The parallel matrix is partitioned across processors such that the
3975    first m0 rows belong to process 0, the next m1 rows belong to
3976    process 1, the next m2 rows belong to process 2 etc.. where
3977    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3978    values corresponding to [m x N] submatrix.
3979 
3980    The columns are logically partitioned with the n0 columns belonging
3981    to 0th partition, the next n1 columns belonging to the next
3982    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3983 
3984    The DIAGONAL portion of the local submatrix on any given processor
3985    is the submatrix corresponding to the rows and columns m,n
3986    corresponding to the given processor. i.e diagonal matrix on
3987    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3988    etc. The remaining portion of the local submatrix [m x (N-n)]
3989    constitute the OFF-DIAGONAL portion. The example below better
3990    illustrates this concept.
3991 
3992    For a square global matrix we define each processor's diagonal portion
3993    to be its local rows and the corresponding columns (a square submatrix);
3994    each processor's off-diagonal portion encompasses the remainder of the
3995    local matrix (a rectangular submatrix).
3996 
3997    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3998 
3999    When calling this routine with a single process communicator, a matrix of
4000    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4001    type of communicator, use the construction mechanism:
4002      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4003 
4004    By default, this format uses inodes (identical nodes) when possible.
4005    We search for consecutive rows with the same nonzero structure, thereby
4006    reusing matrix information to achieve increased efficiency.
4007 
4008    Options Database Keys:
4009 +  -mat_no_inode  - Do not use inodes
4010 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4011 -  -mat_aij_oneindex - Internally use indexing starting at 1
4012         rather than 0.  Note that when calling MatSetValues(),
4013         the user still MUST index entries starting at 0!
4014 
4015 
4016    Example usage:
4017 
4018    Consider the following 8x8 matrix with 34 non-zero values, that is
4019    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4020    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4021    as follows:
4022 
4023 .vb
4024             1  2  0  |  0  3  0  |  0  4
4025     Proc0   0  5  6  |  7  0  0  |  8  0
4026             9  0 10  | 11  0  0  | 12  0
4027     -------------------------------------
4028            13  0 14  | 15 16 17  |  0  0
4029     Proc1   0 18  0  | 19 20 21  |  0  0
4030             0  0  0  | 22 23  0  | 24  0
4031     -------------------------------------
4032     Proc2  25 26 27  |  0  0 28  | 29  0
4033            30  0  0  | 31 32 33  |  0 34
4034 .ve
4035 
4036    This can be represented as a collection of submatrices as:
4037 
4038 .vb
4039       A B C
4040       D E F
4041       G H I
4042 .ve
4043 
4044    Where the submatrices A,B,C are owned by proc0, D,E,F are
4045    owned by proc1, G,H,I are owned by proc2.
4046 
4047    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4048    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4049    The 'M','N' parameters are 8,8, and have the same values on all procs.
4050 
4051    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4052    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4053    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4054    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4055    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4056    matrix, ans [DF] as another SeqAIJ matrix.
4057 
4058    When d_nz, o_nz parameters are specified, d_nz storage elements are
4059    allocated for every row of the local diagonal submatrix, and o_nz
4060    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4061    One way to choose d_nz and o_nz is to use the max nonzerors per local
4062    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4063    In this case, the values of d_nz,o_nz are:
4064 .vb
4065      proc0 : dnz = 2, o_nz = 2
4066      proc1 : dnz = 3, o_nz = 2
4067      proc2 : dnz = 1, o_nz = 4
4068 .ve
4069    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4070    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4071    for proc3. i.e we are using 12+15+10=37 storage locations to store
4072    34 values.
4073 
4074    When d_nnz, o_nnz parameters are specified, the storage is specified
4075    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4076    In the above case the values for d_nnz,o_nnz are:
4077 .vb
4078      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4079      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4080      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4081 .ve
4082    Here the space allocated is sum of all the above values i.e 34, and
4083    hence pre-allocation is perfect.
4084 
4085    Level: intermediate
4086 
4087 .keywords: matrix, aij, compressed row, sparse, parallel
4088 
4089 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4090           MPIAIJ, MatCreateMPIAIJWithArrays()
4091 @*/
4092 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4093 {
4094   PetscErrorCode ierr;
4095   PetscMPIInt    size;
4096 
4097   PetscFunctionBegin;
4098   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4099   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4101   if (size > 1) {
4102     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4103     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4104   } else {
4105     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4106     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4107   }
4108   PetscFunctionReturn(0);
4109 }
4110 
4111 #undef __FUNCT__
4112 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4113 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4114 {
4115   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4116 
4117   PetscFunctionBegin;
4118   *Ad     = a->A;
4119   *Ao     = a->B;
4120   *colmap = a->garray;
4121   PetscFunctionReturn(0);
4122 }
4123 
4124 #undef __FUNCT__
4125 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4126 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4127 {
4128   PetscErrorCode ierr;
4129   PetscInt       i;
4130   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4131 
4132   PetscFunctionBegin;
4133   if (coloring->ctype == IS_COLORING_GLOBAL) {
4134     ISColoringValue *allcolors,*colors;
4135     ISColoring      ocoloring;
4136 
4137     /* set coloring for diagonal portion */
4138     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4139 
4140     /* set coloring for off-diagonal portion */
4141     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4142     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4143     for (i=0; i<a->B->cmap->n; i++) {
4144       colors[i] = allcolors[a->garray[i]];
4145     }
4146     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4147     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4148     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4149     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4150   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4151     ISColoringValue *colors;
4152     PetscInt        *larray;
4153     ISColoring      ocoloring;
4154 
4155     /* set coloring for diagonal portion */
4156     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4157     for (i=0; i<a->A->cmap->n; i++) {
4158       larray[i] = i + A->cmap->rstart;
4159     }
4160     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4161     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4162     for (i=0; i<a->A->cmap->n; i++) {
4163       colors[i] = coloring->colors[larray[i]];
4164     }
4165     ierr = PetscFree(larray);CHKERRQ(ierr);
4166     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4167     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4168     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4169 
4170     /* set coloring for off-diagonal portion */
4171     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4172     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4173     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4174     for (i=0; i<a->B->cmap->n; i++) {
4175       colors[i] = coloring->colors[larray[i]];
4176     }
4177     ierr = PetscFree(larray);CHKERRQ(ierr);
4178     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4179     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4180     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4181   } else {
4182     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4183   }
4184 
4185   PetscFunctionReturn(0);
4186 }
4187 
4188 #if defined(PETSC_HAVE_ADIC)
4189 #undef __FUNCT__
4190 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4191 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4192 {
4193   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4194   PetscErrorCode ierr;
4195 
4196   PetscFunctionBegin;
4197   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4198   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4199   PetscFunctionReturn(0);
4200 }
4201 #endif
4202 
4203 #undef __FUNCT__
4204 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4205 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4206 {
4207   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4208   PetscErrorCode ierr;
4209 
4210   PetscFunctionBegin;
4211   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4212   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4213   PetscFunctionReturn(0);
4214 }
4215 
4216 #undef __FUNCT__
4217 #define __FUNCT__ "MatMerge"
4218 /*@
4219       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4220                  matrices from each processor
4221 
4222     Collective on MPI_Comm
4223 
4224    Input Parameters:
4225 +    comm - the communicators the parallel matrix will live on
4226 .    inmat - the input sequential matrices
4227 .    n - number of local columns (or PETSC_DECIDE)
4228 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4229 
4230    Output Parameter:
4231 .    outmat - the parallel matrix generated
4232 
4233     Level: advanced
4234 
4235    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4236 
4237 @*/
4238 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4239 {
4240   PetscErrorCode ierr;
4241   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4242   PetscInt       *indx;
4243   PetscScalar    *values;
4244 
4245   PetscFunctionBegin;
4246   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4247   if (scall == MAT_INITIAL_MATRIX){
4248     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4249     if (n == PETSC_DECIDE){
4250       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4251     }
4252     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4253     rstart -= m;
4254 
4255     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4256     for (i=0;i<m;i++) {
4257       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4258       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4259       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4260     }
4261     /* This routine will ONLY return MPIAIJ type matrix */
4262     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4263     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4264     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4265     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4266     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4267 
4268   } else if (scall == MAT_REUSE_MATRIX){
4269     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4270   } else {
4271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4272   }
4273 
4274   for (i=0;i<m;i++) {
4275     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4276     Ii    = i + rstart;
4277     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4278     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4279   }
4280   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4281   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4282   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4283 
4284   PetscFunctionReturn(0);
4285 }
4286 
4287 #undef __FUNCT__
4288 #define __FUNCT__ "MatFileSplit"
4289 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4290 {
4291   PetscErrorCode    ierr;
4292   PetscMPIInt       rank;
4293   PetscInt          m,N,i,rstart,nnz;
4294   size_t            len;
4295   const PetscInt    *indx;
4296   PetscViewer       out;
4297   char              *name;
4298   Mat               B;
4299   const PetscScalar *values;
4300 
4301   PetscFunctionBegin;
4302   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4303   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4304   /* Should this be the type of the diagonal block of A? */
4305   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4306   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4307   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4308   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4309   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4310   for (i=0;i<m;i++) {
4311     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4312     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4313     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4314   }
4315   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4316   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4317 
4318   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4319   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4320   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4321   sprintf(name,"%s.%d",outfile,rank);
4322   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4323   ierr = PetscFree(name);
4324   ierr = MatView(B,out);CHKERRQ(ierr);
4325   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4326   ierr = MatDestroy(B);CHKERRQ(ierr);
4327   PetscFunctionReturn(0);
4328 }
4329 
4330 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4333 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4334 {
4335   PetscErrorCode       ierr;
4336   Mat_Merge_SeqsToMPI  *merge;
4337   PetscContainer       container;
4338 
4339   PetscFunctionBegin;
4340   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4341   if (container) {
4342     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4343     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4344     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4347     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4348     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4349     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4350     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4351     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4352     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4353     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4354     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4355     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4356 
4357     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4358     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4359   }
4360   ierr = PetscFree(merge);CHKERRQ(ierr);
4361 
4362   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 #include <../src/mat/utils/freespace.h>
4367 #include <petscbt.h>
4368 
4369 #undef __FUNCT__
4370 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4371 /*@C
4372       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4373                  matrices from each processor
4374 
4375     Collective on MPI_Comm
4376 
4377    Input Parameters:
4378 +    comm - the communicators the parallel matrix will live on
4379 .    seqmat - the input sequential matrices
4380 .    m - number of local rows (or PETSC_DECIDE)
4381 .    n - number of local columns (or PETSC_DECIDE)
4382 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4383 
4384    Output Parameter:
4385 .    mpimat - the parallel matrix generated
4386 
4387     Level: advanced
4388 
4389    Notes:
4390      The dimensions of the sequential matrix in each processor MUST be the same.
4391      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4392      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4393 @*/
4394 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4395 {
4396   PetscErrorCode       ierr;
4397   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4398   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4399   PetscMPIInt          size,rank,taga,*len_s;
4400   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4401   PetscInt             proc,m;
4402   PetscInt             **buf_ri,**buf_rj;
4403   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4404   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4405   MPI_Request          *s_waits,*r_waits;
4406   MPI_Status           *status;
4407   MatScalar            *aa=a->a;
4408   MatScalar            **abuf_r,*ba_i;
4409   Mat_Merge_SeqsToMPI  *merge;
4410   PetscContainer       container;
4411 
4412   PetscFunctionBegin;
4413   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4414 
4415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4416   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4417 
4418   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4419   if (container) {
4420     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4421   }
4422   bi     = merge->bi;
4423   bj     = merge->bj;
4424   buf_ri = merge->buf_ri;
4425   buf_rj = merge->buf_rj;
4426 
4427   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4428   owners = merge->rowmap->range;
4429   len_s  = merge->len_s;
4430 
4431   /* send and recv matrix values */
4432   /*-----------------------------*/
4433   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4434   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4435 
4436   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4437   for (proc=0,k=0; proc<size; proc++){
4438     if (!len_s[proc]) continue;
4439     i = owners[proc];
4440     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4441     k++;
4442   }
4443 
4444   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4445   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4446   ierr = PetscFree(status);CHKERRQ(ierr);
4447 
4448   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4449   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4450 
4451   /* insert mat values of mpimat */
4452   /*----------------------------*/
4453   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4454   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4455 
4456   for (k=0; k<merge->nrecv; k++){
4457     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4458     nrows = *(buf_ri_k[k]);
4459     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4460     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4461   }
4462 
4463   /* set values of ba */
4464   m = merge->rowmap->n;
4465   for (i=0; i<m; i++) {
4466     arow = owners[rank] + i;
4467     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4468     bnzi = bi[i+1] - bi[i];
4469     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4470 
4471     /* add local non-zero vals of this proc's seqmat into ba */
4472     anzi = ai[arow+1] - ai[arow];
4473     aj   = a->j + ai[arow];
4474     aa   = a->a + ai[arow];
4475     nextaj = 0;
4476     for (j=0; nextaj<anzi; j++){
4477       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4478         ba_i[j] += aa[nextaj++];
4479       }
4480     }
4481 
4482     /* add received vals into ba */
4483     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4484       /* i-th row */
4485       if (i == *nextrow[k]) {
4486         anzi = *(nextai[k]+1) - *nextai[k];
4487         aj   = buf_rj[k] + *(nextai[k]);
4488         aa   = abuf_r[k] + *(nextai[k]);
4489         nextaj = 0;
4490         for (j=0; nextaj<anzi; j++){
4491           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4492             ba_i[j] += aa[nextaj++];
4493           }
4494         }
4495         nextrow[k]++; nextai[k]++;
4496       }
4497     }
4498     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4499   }
4500   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4501   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4502 
4503   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4504   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4505   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4506   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4507   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4508   PetscFunctionReturn(0);
4509 }
4510 
4511 #undef __FUNCT__
4512 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4513 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4514 {
4515   PetscErrorCode       ierr;
4516   Mat                  B_mpi;
4517   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4518   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4519   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4520   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4521   PetscInt             len,proc,*dnz,*onz;
4522   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4523   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4524   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4525   MPI_Status           *status;
4526   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4527   PetscBT              lnkbt;
4528   Mat_Merge_SeqsToMPI  *merge;
4529   PetscContainer       container;
4530 
4531   PetscFunctionBegin;
4532   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4533 
4534   /* make sure it is a PETSc comm */
4535   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4537   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4538 
4539   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4540   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4541 
4542   /* determine row ownership */
4543   /*---------------------------------------------------------*/
4544   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4545   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4546   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4547   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4548   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4549   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4550   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4551 
4552   m      = merge->rowmap->n;
4553   M      = merge->rowmap->N;
4554   owners = merge->rowmap->range;
4555 
4556   /* determine the number of messages to send, their lengths */
4557   /*---------------------------------------------------------*/
4558   len_s  = merge->len_s;
4559 
4560   len = 0;  /* length of buf_si[] */
4561   merge->nsend = 0;
4562   for (proc=0; proc<size; proc++){
4563     len_si[proc] = 0;
4564     if (proc == rank){
4565       len_s[proc] = 0;
4566     } else {
4567       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4568       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4569     }
4570     if (len_s[proc]) {
4571       merge->nsend++;
4572       nrows = 0;
4573       for (i=owners[proc]; i<owners[proc+1]; i++){
4574         if (ai[i+1] > ai[i]) nrows++;
4575       }
4576       len_si[proc] = 2*(nrows+1);
4577       len += len_si[proc];
4578     }
4579   }
4580 
4581   /* determine the number and length of messages to receive for ij-structure */
4582   /*-------------------------------------------------------------------------*/
4583   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4584   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4585 
4586   /* post the Irecv of j-structure */
4587   /*-------------------------------*/
4588   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4589   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4590 
4591   /* post the Isend of j-structure */
4592   /*--------------------------------*/
4593   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4594 
4595   for (proc=0, k=0; proc<size; proc++){
4596     if (!len_s[proc]) continue;
4597     i = owners[proc];
4598     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4599     k++;
4600   }
4601 
4602   /* receives and sends of j-structure are complete */
4603   /*------------------------------------------------*/
4604   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4605   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4606 
4607   /* send and recv i-structure */
4608   /*---------------------------*/
4609   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4610   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4611 
4612   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4613   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4614   for (proc=0,k=0; proc<size; proc++){
4615     if (!len_s[proc]) continue;
4616     /* form outgoing message for i-structure:
4617          buf_si[0]:                 nrows to be sent
4618                [1:nrows]:           row index (global)
4619                [nrows+1:2*nrows+1]: i-structure index
4620     */
4621     /*-------------------------------------------*/
4622     nrows = len_si[proc]/2 - 1;
4623     buf_si_i    = buf_si + nrows+1;
4624     buf_si[0]   = nrows;
4625     buf_si_i[0] = 0;
4626     nrows = 0;
4627     for (i=owners[proc]; i<owners[proc+1]; i++){
4628       anzi = ai[i+1] - ai[i];
4629       if (anzi) {
4630         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4631         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4632         nrows++;
4633       }
4634     }
4635     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4636     k++;
4637     buf_si += len_si[proc];
4638   }
4639 
4640   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4641   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4642 
4643   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4644   for (i=0; i<merge->nrecv; i++){
4645     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4646   }
4647 
4648   ierr = PetscFree(len_si);CHKERRQ(ierr);
4649   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4650   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4651   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4652   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4653   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4654   ierr = PetscFree(status);CHKERRQ(ierr);
4655 
4656   /* compute a local seq matrix in each processor */
4657   /*----------------------------------------------*/
4658   /* allocate bi array and free space for accumulating nonzero column info */
4659   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4660   bi[0] = 0;
4661 
4662   /* create and initialize a linked list */
4663   nlnk = N+1;
4664   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4665 
4666   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4667   len = 0;
4668   len  = ai[owners[rank+1]] - ai[owners[rank]];
4669   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4670   current_space = free_space;
4671 
4672   /* determine symbolic info for each local row */
4673   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4674 
4675   for (k=0; k<merge->nrecv; k++){
4676     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4677     nrows = *buf_ri_k[k];
4678     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4679     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4680   }
4681 
4682   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4683   len = 0;
4684   for (i=0;i<m;i++) {
4685     bnzi   = 0;
4686     /* add local non-zero cols of this proc's seqmat into lnk */
4687     arow   = owners[rank] + i;
4688     anzi   = ai[arow+1] - ai[arow];
4689     aj     = a->j + ai[arow];
4690     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4691     bnzi += nlnk;
4692     /* add received col data into lnk */
4693     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4694       if (i == *nextrow[k]) { /* i-th row */
4695         anzi = *(nextai[k]+1) - *nextai[k];
4696         aj   = buf_rj[k] + *nextai[k];
4697         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4698         bnzi += nlnk;
4699         nextrow[k]++; nextai[k]++;
4700       }
4701     }
4702     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4703 
4704     /* if free space is not available, make more free space */
4705     if (current_space->local_remaining<bnzi) {
4706       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4707       nspacedouble++;
4708     }
4709     /* copy data into free space, then initialize lnk */
4710     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4711     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4712 
4713     current_space->array           += bnzi;
4714     current_space->local_used      += bnzi;
4715     current_space->local_remaining -= bnzi;
4716 
4717     bi[i+1] = bi[i] + bnzi;
4718   }
4719 
4720   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4721 
4722   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4723   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4724   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4725 
4726   /* create symbolic parallel matrix B_mpi */
4727   /*---------------------------------------*/
4728   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4729   if (n==PETSC_DECIDE) {
4730     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4731   } else {
4732     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4733   }
4734   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4735   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4736   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4737 
4738   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4739   B_mpi->assembled     = PETSC_FALSE;
4740   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4741   merge->bi            = bi;
4742   merge->bj            = bj;
4743   merge->buf_ri        = buf_ri;
4744   merge->buf_rj        = buf_rj;
4745   merge->coi           = PETSC_NULL;
4746   merge->coj           = PETSC_NULL;
4747   merge->owners_co     = PETSC_NULL;
4748 
4749   /* attach the supporting struct to B_mpi for reuse */
4750   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4751   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4752   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4753   *mpimat = B_mpi;
4754 
4755   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4756   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4757   PetscFunctionReturn(0);
4758 }
4759 
4760 #undef __FUNCT__
4761 #define __FUNCT__ "MatMerge_SeqsToMPI"
4762 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4763 {
4764   PetscErrorCode   ierr;
4765 
4766   PetscFunctionBegin;
4767   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4768   if (scall == MAT_INITIAL_MATRIX){
4769     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4770   }
4771   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4772   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4773   PetscFunctionReturn(0);
4774 }
4775 
4776 #undef __FUNCT__
4777 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4778 /*@
4779      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4780           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4781           with MatGetSize()
4782 
4783     Not Collective
4784 
4785    Input Parameters:
4786 +    A - the matrix
4787 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4788 
4789    Output Parameter:
4790 .    A_loc - the local sequential matrix generated
4791 
4792     Level: developer
4793 
4794 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4795 
4796 @*/
4797 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4798 {
4799   PetscErrorCode  ierr;
4800   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4801   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4802   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4803   MatScalar       *aa=a->a,*ba=b->a,*cam;
4804   PetscScalar     *ca;
4805   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4806   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4807   PetscBool       match;
4808 
4809   PetscFunctionBegin;
4810   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4811   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4812   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4813   if (scall == MAT_INITIAL_MATRIX){
4814     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4815     ci[0] = 0;
4816     for (i=0; i<am; i++){
4817       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4818     }
4819     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4820     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4821     k = 0;
4822     for (i=0; i<am; i++) {
4823       ncols_o = bi[i+1] - bi[i];
4824       ncols_d = ai[i+1] - ai[i];
4825       /* off-diagonal portion of A */
4826       for (jo=0; jo<ncols_o; jo++) {
4827         col = cmap[*bj];
4828         if (col >= cstart) break;
4829         cj[k]   = col; bj++;
4830         ca[k++] = *ba++;
4831       }
4832       /* diagonal portion of A */
4833       for (j=0; j<ncols_d; j++) {
4834         cj[k]   = cstart + *aj++;
4835         ca[k++] = *aa++;
4836       }
4837       /* off-diagonal portion of A */
4838       for (j=jo; j<ncols_o; j++) {
4839         cj[k]   = cmap[*bj++];
4840         ca[k++] = *ba++;
4841       }
4842     }
4843     /* put together the new matrix */
4844     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4845     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4846     /* Since these are PETSc arrays, change flags to free them as necessary. */
4847     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4848     mat->free_a  = PETSC_TRUE;
4849     mat->free_ij = PETSC_TRUE;
4850     mat->nonew   = 0;
4851   } else if (scall == MAT_REUSE_MATRIX){
4852     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4853     ci = mat->i; cj = mat->j; cam = mat->a;
4854     for (i=0; i<am; i++) {
4855       /* off-diagonal portion of A */
4856       ncols_o = bi[i+1] - bi[i];
4857       for (jo=0; jo<ncols_o; jo++) {
4858         col = cmap[*bj];
4859         if (col >= cstart) break;
4860         *cam++ = *ba++; bj++;
4861       }
4862       /* diagonal portion of A */
4863       ncols_d = ai[i+1] - ai[i];
4864       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4865       /* off-diagonal portion of A */
4866       for (j=jo; j<ncols_o; j++) {
4867         *cam++ = *ba++; bj++;
4868       }
4869     }
4870   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4871   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 #undef __FUNCT__
4876 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4877 /*@C
4878      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4879 
4880     Not Collective
4881 
4882    Input Parameters:
4883 +    A - the matrix
4884 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4885 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4886 
4887    Output Parameter:
4888 .    A_loc - the local sequential matrix generated
4889 
4890     Level: developer
4891 
4892 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4893 
4894 @*/
4895 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4896 {
4897   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4898   PetscErrorCode    ierr;
4899   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4900   IS                isrowa,iscola;
4901   Mat               *aloc;
4902   PetscBool       match;
4903 
4904   PetscFunctionBegin;
4905   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4906   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4907   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4908   if (!row){
4909     start = A->rmap->rstart; end = A->rmap->rend;
4910     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4911   } else {
4912     isrowa = *row;
4913   }
4914   if (!col){
4915     start = A->cmap->rstart;
4916     cmap  = a->garray;
4917     nzA   = a->A->cmap->n;
4918     nzB   = a->B->cmap->n;
4919     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4920     ncols = 0;
4921     for (i=0; i<nzB; i++) {
4922       if (cmap[i] < start) idx[ncols++] = cmap[i];
4923       else break;
4924     }
4925     imark = i;
4926     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4927     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4928     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4929   } else {
4930     iscola = *col;
4931   }
4932   if (scall != MAT_INITIAL_MATRIX){
4933     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4934     aloc[0] = *A_loc;
4935   }
4936   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4937   *A_loc = aloc[0];
4938   ierr = PetscFree(aloc);CHKERRQ(ierr);
4939   if (!row){
4940     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4941   }
4942   if (!col){
4943     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4944   }
4945   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4946   PetscFunctionReturn(0);
4947 }
4948 
4949 #undef __FUNCT__
4950 #define __FUNCT__ "MatGetBrowsOfAcols"
4951 /*@C
4952     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4953 
4954     Collective on Mat
4955 
4956    Input Parameters:
4957 +    A,B - the matrices in mpiaij format
4958 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4959 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4960 
4961    Output Parameter:
4962 +    rowb, colb - index sets of rows and columns of B to extract
4963 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4964 -    B_seq - the sequential matrix generated
4965 
4966     Level: developer
4967 
4968 @*/
4969 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4970 {
4971   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4972   PetscErrorCode    ierr;
4973   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4974   IS                isrowb,iscolb;
4975   Mat               *bseq;
4976 
4977   PetscFunctionBegin;
4978   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4979     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4980   }
4981   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4982 
4983   if (scall == MAT_INITIAL_MATRIX){
4984     start = A->cmap->rstart;
4985     cmap  = a->garray;
4986     nzA   = a->A->cmap->n;
4987     nzB   = a->B->cmap->n;
4988     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4989     ncols = 0;
4990     for (i=0; i<nzB; i++) {  /* row < local row index */
4991       if (cmap[i] < start) idx[ncols++] = cmap[i];
4992       else break;
4993     }
4994     imark = i;
4995     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4996     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4997     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4998     *brstart = imark;
4999     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5000   } else {
5001     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5002     isrowb = *rowb; iscolb = *colb;
5003     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5004     bseq[0] = *B_seq;
5005   }
5006   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5007   *B_seq = bseq[0];
5008   ierr = PetscFree(bseq);CHKERRQ(ierr);
5009   if (!rowb){
5010     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
5011   } else {
5012     *rowb = isrowb;
5013   }
5014   if (!colb){
5015     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
5016   } else {
5017     *colb = iscolb;
5018   }
5019   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5020   PetscFunctionReturn(0);
5021 }
5022 
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatGetBrowsOfAoCols"
5025 /*@C
5026     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5027     of the OFF-DIAGONAL portion of local A
5028 
5029     Collective on Mat
5030 
5031    Input Parameters:
5032 +    A,B - the matrices in mpiaij format
5033 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5034 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5035 .    startsj_r - similar to startsj for receives
5036 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5037 
5038    Output Parameter:
5039 +    B_oth - the sequential matrix generated
5040 
5041     Level: developer
5042 
5043 @*/
5044 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5045 {
5046   VecScatter_MPI_General *gen_to,*gen_from;
5047   PetscErrorCode         ierr;
5048   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5049   Mat_SeqAIJ             *b_oth;
5050   VecScatter             ctx=a->Mvctx;
5051   MPI_Comm               comm=((PetscObject)ctx)->comm;
5052   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5053   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5054   PetscScalar            *rvalues,*svalues;
5055   MatScalar              *b_otha,*bufa,*bufA;
5056   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5057   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5058   MPI_Status             *sstatus,rstatus;
5059   PetscMPIInt            jj;
5060   PetscInt               *cols,sbs,rbs;
5061   PetscScalar            *vals;
5062 
5063   PetscFunctionBegin;
5064   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5065     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5066   }
5067   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5068   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5069 
5070   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5071   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5072   rvalues  = gen_from->values; /* holds the length of receiving row */
5073   svalues  = gen_to->values;   /* holds the length of sending row */
5074   nrecvs   = gen_from->n;
5075   nsends   = gen_to->n;
5076 
5077   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5078   srow     = gen_to->indices;   /* local row index to be sent */
5079   sstarts  = gen_to->starts;
5080   sprocs   = gen_to->procs;
5081   sstatus  = gen_to->sstatus;
5082   sbs      = gen_to->bs;
5083   rstarts  = gen_from->starts;
5084   rprocs   = gen_from->procs;
5085   rbs      = gen_from->bs;
5086 
5087   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5088   if (scall == MAT_INITIAL_MATRIX){
5089     /* i-array */
5090     /*---------*/
5091     /*  post receives */
5092     for (i=0; i<nrecvs; i++){
5093       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5094       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5095       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5096     }
5097 
5098     /* pack the outgoing message */
5099     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5100     sstartsj[0] = 0;  rstartsj[0] = 0;
5101     len = 0; /* total length of j or a array to be sent */
5102     k = 0;
5103     for (i=0; i<nsends; i++){
5104       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5105       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5106       for (j=0; j<nrows; j++) {
5107         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5108         for (l=0; l<sbs; l++){
5109           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5110           rowlen[j*sbs+l] = ncols;
5111           len += ncols;
5112           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5113         }
5114         k++;
5115       }
5116       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5117       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5118     }
5119     /* recvs and sends of i-array are completed */
5120     i = nrecvs;
5121     while (i--) {
5122       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5123     }
5124     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5125 
5126     /* allocate buffers for sending j and a arrays */
5127     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5128     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5129 
5130     /* create i-array of B_oth */
5131     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5132     b_othi[0] = 0;
5133     len = 0; /* total length of j or a array to be received */
5134     k = 0;
5135     for (i=0; i<nrecvs; i++){
5136       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5137       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5138       for (j=0; j<nrows; j++) {
5139         b_othi[k+1] = b_othi[k] + rowlen[j];
5140         len += rowlen[j]; k++;
5141       }
5142       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5143     }
5144 
5145     /* allocate space for j and a arrrays of B_oth */
5146     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5147     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5148 
5149     /* j-array */
5150     /*---------*/
5151     /*  post receives of j-array */
5152     for (i=0; i<nrecvs; i++){
5153       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5154       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5155     }
5156 
5157     /* pack the outgoing message j-array */
5158     k = 0;
5159     for (i=0; i<nsends; i++){
5160       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5161       bufJ = bufj+sstartsj[i];
5162       for (j=0; j<nrows; j++) {
5163         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5164         for (ll=0; ll<sbs; ll++){
5165           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5166           for (l=0; l<ncols; l++){
5167             *bufJ++ = cols[l];
5168           }
5169           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5170         }
5171       }
5172       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5173     }
5174 
5175     /* recvs and sends of j-array are completed */
5176     i = nrecvs;
5177     while (i--) {
5178       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5179     }
5180     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5181   } else if (scall == MAT_REUSE_MATRIX){
5182     sstartsj = *startsj;
5183     rstartsj = *startsj_r;
5184     bufa     = *bufa_ptr;
5185     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5186     b_otha   = b_oth->a;
5187   } else {
5188     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5189   }
5190 
5191   /* a-array */
5192   /*---------*/
5193   /*  post receives of a-array */
5194   for (i=0; i<nrecvs; i++){
5195     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5196     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5197   }
5198 
5199   /* pack the outgoing message a-array */
5200   k = 0;
5201   for (i=0; i<nsends; i++){
5202     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5203     bufA = bufa+sstartsj[i];
5204     for (j=0; j<nrows; j++) {
5205       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5206       for (ll=0; ll<sbs; ll++){
5207         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5208         for (l=0; l<ncols; l++){
5209           *bufA++ = vals[l];
5210         }
5211         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5212       }
5213     }
5214     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5215   }
5216   /* recvs and sends of a-array are completed */
5217   i = nrecvs;
5218   while (i--) {
5219     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5220   }
5221   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5222   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5223 
5224   if (scall == MAT_INITIAL_MATRIX){
5225     /* put together the new matrix */
5226     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5227 
5228     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5229     /* Since these are PETSc arrays, change flags to free them as necessary. */
5230     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5231     b_oth->free_a  = PETSC_TRUE;
5232     b_oth->free_ij = PETSC_TRUE;
5233     b_oth->nonew   = 0;
5234 
5235     ierr = PetscFree(bufj);CHKERRQ(ierr);
5236     if (!startsj || !bufa_ptr){
5237       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5238       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5239     } else {
5240       *startsj   = sstartsj;
5241       *startsj_r = rstartsj;
5242       *bufa_ptr  = bufa;
5243     }
5244   }
5245   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5246   PetscFunctionReturn(0);
5247 }
5248 
5249 #undef __FUNCT__
5250 #define __FUNCT__ "MatGetCommunicationStructs"
5251 /*@C
5252   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5253 
5254   Not Collective
5255 
5256   Input Parameters:
5257 . A - The matrix in mpiaij format
5258 
5259   Output Parameter:
5260 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5261 . colmap - A map from global column index to local index into lvec
5262 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5263 
5264   Level: developer
5265 
5266 @*/
5267 #if defined (PETSC_USE_CTABLE)
5268 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5269 #else
5270 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5271 #endif
5272 {
5273   Mat_MPIAIJ *a;
5274 
5275   PetscFunctionBegin;
5276   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5277   PetscValidPointer(lvec, 2);
5278   PetscValidPointer(colmap, 3);
5279   PetscValidPointer(multScatter, 4);
5280   a = (Mat_MPIAIJ *) A->data;
5281   if (lvec) *lvec = a->lvec;
5282   if (colmap) *colmap = a->colmap;
5283   if (multScatter) *multScatter = a->Mvctx;
5284   PetscFunctionReturn(0);
5285 }
5286 
5287 EXTERN_C_BEGIN
5288 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5289 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5290 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5291 EXTERN_C_END
5292 
5293 #undef __FUNCT__
5294 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5295 /*
5296     Computes (B'*A')' since computing B*A directly is untenable
5297 
5298                n                       p                          p
5299         (              )       (              )         (                  )
5300       m (      A       )  *  n (       B      )   =   m (         C        )
5301         (              )       (              )         (                  )
5302 
5303 */
5304 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5305 {
5306   PetscErrorCode     ierr;
5307   Mat                At,Bt,Ct;
5308 
5309   PetscFunctionBegin;
5310   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5311   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5312   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5313   ierr = MatDestroy(At);CHKERRQ(ierr);
5314   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5315   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5316   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5317   PetscFunctionReturn(0);
5318 }
5319 
5320 #undef __FUNCT__
5321 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5322 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5323 {
5324   PetscErrorCode ierr;
5325   PetscInt       m=A->rmap->n,n=B->cmap->n;
5326   Mat            Cmat;
5327 
5328   PetscFunctionBegin;
5329   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5330   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5331   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5332   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5333   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5334   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5335   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5336   *C   = Cmat;
5337   PetscFunctionReturn(0);
5338 }
5339 
5340 /* ----------------------------------------------------------------*/
5341 #undef __FUNCT__
5342 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5343 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5344 {
5345   PetscErrorCode ierr;
5346 
5347   PetscFunctionBegin;
5348   if (scall == MAT_INITIAL_MATRIX){
5349     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5350   }
5351   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5352   PetscFunctionReturn(0);
5353 }
5354 
5355 EXTERN_C_BEGIN
5356 #if defined(PETSC_HAVE_MUMPS)
5357 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5358 #endif
5359 #if defined(PETSC_HAVE_PASTIX)
5360 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5361 #endif
5362 #if defined(PETSC_HAVE_SUPERLU_DIST)
5363 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5364 #endif
5365 #if defined(PETSC_HAVE_SPOOLES)
5366 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5367 #endif
5368 EXTERN_C_END
5369 
5370 /*MC
5371    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5372 
5373    Options Database Keys:
5374 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5375 
5376   Level: beginner
5377 
5378 .seealso: MatCreateMPIAIJ()
5379 M*/
5380 
5381 EXTERN_C_BEGIN
5382 #undef __FUNCT__
5383 #define __FUNCT__ "MatCreate_MPIAIJ"
5384 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5385 {
5386   Mat_MPIAIJ     *b;
5387   PetscErrorCode ierr;
5388   PetscMPIInt    size;
5389 
5390   PetscFunctionBegin;
5391   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5392 
5393   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5394   B->data         = (void*)b;
5395   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5396   B->rmap->bs     = 1;
5397   B->assembled    = PETSC_FALSE;
5398 
5399   B->insertmode   = NOT_SET_VALUES;
5400   b->size         = size;
5401   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5402 
5403   /* build cache for off array entries formed */
5404   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5405   b->donotstash  = PETSC_FALSE;
5406   b->colmap      = 0;
5407   b->garray      = 0;
5408   b->roworiented = PETSC_TRUE;
5409 
5410   /* stuff used for matrix vector multiply */
5411   b->lvec      = PETSC_NULL;
5412   b->Mvctx     = PETSC_NULL;
5413 
5414   /* stuff for MatGetRow() */
5415   b->rowindices   = 0;
5416   b->rowvalues    = 0;
5417   b->getrowactive = PETSC_FALSE;
5418 
5419 #if defined(PETSC_HAVE_SPOOLES)
5420   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5421                                      "MatGetFactor_mpiaij_spooles",
5422                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5423 #endif
5424 #if defined(PETSC_HAVE_MUMPS)
5425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5426                                      "MatGetFactor_aij_mumps",
5427                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5428 #endif
5429 #if defined(PETSC_HAVE_PASTIX)
5430   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5431 					   "MatGetFactor_mpiaij_pastix",
5432 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5433 #endif
5434 #if defined(PETSC_HAVE_SUPERLU_DIST)
5435   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5436                                      "MatGetFactor_mpiaij_superlu_dist",
5437                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5438 #endif
5439   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5440                                      "MatStoreValues_MPIAIJ",
5441                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5442   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5443                                      "MatRetrieveValues_MPIAIJ",
5444                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5445   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5446 				     "MatGetDiagonalBlock_MPIAIJ",
5447                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5448   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5449 				     "MatIsTranspose_MPIAIJ",
5450 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5451   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5452 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5453 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5454   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5455 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5456 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5457   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5458 				     "MatDiagonalScaleLocal_MPIAIJ",
5459 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5460   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5461                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5462                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5463   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5464                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5465                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5466   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5467                                      "MatConvert_MPIAIJ_MPISBAIJ",
5468                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5469   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5470                                      "MatMatMult_MPIDense_MPIAIJ",
5471                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5472   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5473                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5474                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5475   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5476                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5477                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5478   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5479   PetscFunctionReturn(0);
5480 }
5481 EXTERN_C_END
5482 
5483 #undef __FUNCT__
5484 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5485 /*@
5486      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5487          and "off-diagonal" part of the matrix in CSR format.
5488 
5489    Collective on MPI_Comm
5490 
5491    Input Parameters:
5492 +  comm - MPI communicator
5493 .  m - number of local rows (Cannot be PETSC_DECIDE)
5494 .  n - This value should be the same as the local size used in creating the
5495        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5496        calculated if N is given) For square matrices n is almost always m.
5497 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5498 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5499 .   i - row indices for "diagonal" portion of matrix
5500 .   j - column indices
5501 .   a - matrix values
5502 .   oi - row indices for "off-diagonal" portion of matrix
5503 .   oj - column indices
5504 -   oa - matrix values
5505 
5506    Output Parameter:
5507 .   mat - the matrix
5508 
5509    Level: advanced
5510 
5511    Notes:
5512        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5513        must free the arrays once the matrix has been destroyed and not before.
5514 
5515        The i and j indices are 0 based
5516 
5517        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5518 
5519        This sets local rows and cannot be used to set off-processor values.
5520 
5521        You cannot later use MatSetValues() to change values in this matrix.
5522 
5523 .keywords: matrix, aij, compressed row, sparse, parallel
5524 
5525 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5526           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5527 @*/
5528 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5529 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5530 {
5531   PetscErrorCode ierr;
5532   Mat_MPIAIJ     *maij;
5533 
5534  PetscFunctionBegin;
5535   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5536   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5537   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5538   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5539   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5540   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5541   maij = (Mat_MPIAIJ*) (*mat)->data;
5542   maij->donotstash     = PETSC_TRUE;
5543   (*mat)->preallocated = PETSC_TRUE;
5544 
5545   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5546   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5547   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5548   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5549 
5550   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5551   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5552 
5553   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5554   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5555   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5556   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5557 
5558   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5559   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5560   PetscFunctionReturn(0);
5561 }
5562 
5563 /*
5564     Special version for direct calls from Fortran
5565 */
5566 #include <private/fortranimpl.h>
5567 
5568 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5569 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5570 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5571 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5572 #endif
5573 
5574 /* Change these macros so can be used in void function */
5575 #undef CHKERRQ
5576 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5577 #undef SETERRQ2
5578 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5579 #undef SETERRQ
5580 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5581 
5582 EXTERN_C_BEGIN
5583 #undef __FUNCT__
5584 #define __FUNCT__ "matsetvaluesmpiaij_"
5585 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5586 {
5587   Mat             mat = *mmat;
5588   PetscInt        m = *mm, n = *mn;
5589   InsertMode      addv = *maddv;
5590   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5591   PetscScalar     value;
5592   PetscErrorCode  ierr;
5593 
5594   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5595   if (mat->insertmode == NOT_SET_VALUES) {
5596     mat->insertmode = addv;
5597   }
5598 #if defined(PETSC_USE_DEBUG)
5599   else if (mat->insertmode != addv) {
5600     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5601   }
5602 #endif
5603   {
5604   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5605   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5606   PetscBool       roworiented = aij->roworiented;
5607 
5608   /* Some Variables required in the macro */
5609   Mat             A = aij->A;
5610   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5611   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5612   MatScalar       *aa = a->a;
5613   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5614   Mat             B = aij->B;
5615   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5616   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5617   MatScalar       *ba = b->a;
5618 
5619   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5620   PetscInt        nonew = a->nonew;
5621   MatScalar       *ap1,*ap2;
5622 
5623   PetscFunctionBegin;
5624   for (i=0; i<m; i++) {
5625     if (im[i] < 0) continue;
5626 #if defined(PETSC_USE_DEBUG)
5627     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5628 #endif
5629     if (im[i] >= rstart && im[i] < rend) {
5630       row      = im[i] - rstart;
5631       lastcol1 = -1;
5632       rp1      = aj + ai[row];
5633       ap1      = aa + ai[row];
5634       rmax1    = aimax[row];
5635       nrow1    = ailen[row];
5636       low1     = 0;
5637       high1    = nrow1;
5638       lastcol2 = -1;
5639       rp2      = bj + bi[row];
5640       ap2      = ba + bi[row];
5641       rmax2    = bimax[row];
5642       nrow2    = bilen[row];
5643       low2     = 0;
5644       high2    = nrow2;
5645 
5646       for (j=0; j<n; j++) {
5647         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5648         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5649         if (in[j] >= cstart && in[j] < cend){
5650           col = in[j] - cstart;
5651           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5652         } else if (in[j] < 0) continue;
5653 #if defined(PETSC_USE_DEBUG)
5654         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5655 #endif
5656         else {
5657           if (mat->was_assembled) {
5658             if (!aij->colmap) {
5659               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5660             }
5661 #if defined (PETSC_USE_CTABLE)
5662             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5663 	    col--;
5664 #else
5665             col = aij->colmap[in[j]] - 1;
5666 #endif
5667             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5668               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5669               col =  in[j];
5670               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5671               B = aij->B;
5672               b = (Mat_SeqAIJ*)B->data;
5673               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5674               rp2      = bj + bi[row];
5675               ap2      = ba + bi[row];
5676               rmax2    = bimax[row];
5677               nrow2    = bilen[row];
5678               low2     = 0;
5679               high2    = nrow2;
5680               bm       = aij->B->rmap->n;
5681               ba = b->a;
5682             }
5683           } else col = in[j];
5684           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5685         }
5686       }
5687     } else {
5688       if (!aij->donotstash) {
5689         if (roworiented) {
5690           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5691         } else {
5692           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5693         }
5694       }
5695     }
5696   }}
5697   PetscFunctionReturnVoid();
5698 }
5699 EXTERN_C_END
5700 
5701