xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4aa3045d83914e00674d51ff6fb6b94ef17679f6)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) {
1114       PetscTruth flg = PETSC_FALSE;
1115 
1116       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1117       if (!flg) {
1118         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1119       }
1120     }
1121 
1122     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1123     if (!rank) {
1124       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1125     } else {
1126       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1127     }
1128     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1129     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1130     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1131     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1132 
1133     /* copy over the A part */
1134     Aloc = (Mat_SeqAIJ*)aij->A->data;
1135     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1136     row = mat->rmap->rstart;
1137     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1138     for (i=0; i<m; i++) {
1139       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1140       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1141     }
1142     aj = Aloc->j;
1143     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1144 
1145     /* copy over the B part */
1146     Aloc = (Mat_SeqAIJ*)aij->B->data;
1147     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     row  = mat->rmap->rstart;
1149     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1150     ct   = cols;
1151     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1155     }
1156     ierr = PetscFree(ct);CHKERRQ(ierr);
1157     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159     /*
1160        Everyone has to call to draw the matrix since the graphics waits are
1161        synchronized across all processors that share the PetscDraw object
1162     */
1163     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1164     if (!rank) {
1165       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1166       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1167     }
1168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169     ierr = MatDestroy(A);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatView_MPIAIJ"
1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1177 {
1178   PetscErrorCode ierr;
1179   PetscTruth     iascii,isdraw,issocket,isbinary;
1180 
1181   PetscFunctionBegin;
1182   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1183   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1184   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1185   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1186   if (iascii || isdraw || isbinary || issocket) {
1187     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1188   } else {
1189     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatRelax_MPIAIJ"
1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1197 {
1198   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1199   PetscErrorCode ierr;
1200   Vec            bb1;
1201 
1202   PetscFunctionBegin;
1203   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1204 
1205   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1206     if (flag & SOR_ZERO_INITIAL_GUESS) {
1207       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1208       its--;
1209     }
1210 
1211     while (its--) {
1212       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214 
1215       /* update rhs: bb1 = bb - B*x */
1216       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1217       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1218 
1219       /* local sweep */
1220       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1221     }
1222   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227     while (its--) {
1228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231       /* update rhs: bb1 = bb - B*x */
1232       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1233       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1234 
1235       /* local sweep */
1236       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1237     }
1238   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1239     if (flag & SOR_ZERO_INITIAL_GUESS) {
1240       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1241       its--;
1242     }
1243     while (its--) {
1244       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247       /* update rhs: bb1 = bb - B*x */
1248       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1249       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1250 
1251       /* local sweep */
1252       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1253     }
1254   } else {
1255     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1256   }
1257 
1258   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatPermute_MPIAIJ"
1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1265 {
1266   MPI_Comm       comm,pcomm;
1267   PetscInt       first,local_size,nrows;
1268   const PetscInt *rows;
1269   PetscMPIInt    size;
1270   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1275   /* make a collective version of 'rowp' */
1276   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1277   if (pcomm==comm) {
1278     crowp = rowp;
1279   } else {
1280     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1281     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1282     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1283     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1284   }
1285   /* collect the global row permutation and invert it */
1286   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1287   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1288   if (pcomm!=comm) {
1289     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1290   }
1291   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1292   /* get the local target indices */
1293   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1294   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1295   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1296   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1297   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1298   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1299   /* the column permutation is so much easier;
1300      make a local version of 'colp' and invert it */
1301   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1302   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1303   if (size==1) {
1304     lcolp = colp;
1305   } else {
1306     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1307     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1308     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1309   }
1310   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1311   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1312   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1313   if (size>1) {
1314     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1315     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1316   }
1317   /* now we just get the submatrix */
1318   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1319   /* clean up */
1320   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1321   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1328 {
1329   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1330   Mat            A = mat->A,B = mat->B;
1331   PetscErrorCode ierr;
1332   PetscReal      isend[5],irecv[5];
1333 
1334   PetscFunctionBegin;
1335   info->block_size     = 1.0;
1336   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1337   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1338   isend[3] = info->memory;  isend[4] = info->mallocs;
1339   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1340   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1341   isend[3] += info->memory;  isend[4] += info->mallocs;
1342   if (flag == MAT_LOCAL) {
1343     info->nz_used      = isend[0];
1344     info->nz_allocated = isend[1];
1345     info->nz_unneeded  = isend[2];
1346     info->memory       = isend[3];
1347     info->mallocs      = isend[4];
1348   } else if (flag == MAT_GLOBAL_MAX) {
1349     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1350     info->nz_used      = irecv[0];
1351     info->nz_allocated = irecv[1];
1352     info->nz_unneeded  = irecv[2];
1353     info->memory       = irecv[3];
1354     info->mallocs      = irecv[4];
1355   } else if (flag == MAT_GLOBAL_SUM) {
1356     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1357     info->nz_used      = irecv[0];
1358     info->nz_allocated = irecv[1];
1359     info->nz_unneeded  = irecv[2];
1360     info->memory       = irecv[3];
1361     info->mallocs      = irecv[4];
1362   }
1363   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1364   info->fill_ratio_needed = 0;
1365   info->factor_mallocs    = 0;
1366 
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatSetOption_MPIAIJ"
1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1373 {
1374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   switch (op) {
1379   case MAT_NEW_NONZERO_LOCATIONS:
1380   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1381   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1382   case MAT_KEEP_ZEROED_ROWS:
1383   case MAT_NEW_NONZERO_LOCATION_ERR:
1384   case MAT_USE_INODES:
1385   case MAT_IGNORE_ZERO_ENTRIES:
1386     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1387     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1388     break;
1389   case MAT_ROW_ORIENTED:
1390     a->roworiented = flg;
1391     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1392     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1393     break;
1394   case MAT_NEW_DIAGONALS:
1395     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1396     break;
1397   case MAT_IGNORE_OFF_PROC_ENTRIES:
1398     a->donotstash = PETSC_TRUE;
1399     break;
1400   case MAT_SYMMETRIC:
1401     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1402     break;
1403   case MAT_STRUCTURALLY_SYMMETRIC:
1404   case MAT_HERMITIAN:
1405   case MAT_SYMMETRY_ETERNAL:
1406     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1407     break;
1408   default:
1409     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetRow_MPIAIJ"
1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1417 {
1418   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1419   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1420   PetscErrorCode ierr;
1421   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1422   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1423   PetscInt       *cmap,*idx_p;
1424 
1425   PetscFunctionBegin;
1426   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1427   mat->getrowactive = PETSC_TRUE;
1428 
1429   if (!mat->rowvalues && (idx || v)) {
1430     /*
1431         allocate enough space to hold information from the longest row.
1432     */
1433     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1434     PetscInt     max = 1,tmp;
1435     for (i=0; i<matin->rmap->n; i++) {
1436       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1437       if (max < tmp) { max = tmp; }
1438     }
1439     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1440     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1441   }
1442 
1443   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1444   lrow = row - rstart;
1445 
1446   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1447   if (!v)   {pvA = 0; pvB = 0;}
1448   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1449   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451   nztot = nzA + nzB;
1452 
1453   cmap  = mat->garray;
1454   if (v  || idx) {
1455     if (nztot) {
1456       /* Sort by increasing column numbers, assuming A and B already sorted */
1457       PetscInt imark = -1;
1458       if (v) {
1459         *v = v_p = mat->rowvalues;
1460         for (i=0; i<nzB; i++) {
1461           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1462           else break;
1463         }
1464         imark = i;
1465         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1466         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1467       }
1468       if (idx) {
1469         *idx = idx_p = mat->rowindices;
1470         if (imark > -1) {
1471           for (i=0; i<imark; i++) {
1472             idx_p[i] = cmap[cworkB[i]];
1473           }
1474         } else {
1475           for (i=0; i<nzB; i++) {
1476             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1477             else break;
1478           }
1479           imark = i;
1480         }
1481         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1482         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1483       }
1484     } else {
1485       if (idx) *idx = 0;
1486       if (v)   *v   = 0;
1487     }
1488   }
1489   *nz = nztot;
1490   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!aij->getrowactive) {
1503     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1504   }
1505   aij->getrowactive = PETSC_FALSE;
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatNorm_MPIAIJ"
1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1512 {
1513   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1514   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1515   PetscErrorCode ierr;
1516   PetscInt       i,j,cstart = mat->cmap->rstart;
1517   PetscReal      sum = 0.0;
1518   MatScalar      *v;
1519 
1520   PetscFunctionBegin;
1521   if (aij->size == 1) {
1522     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1523   } else {
1524     if (type == NORM_FROBENIUS) {
1525       v = amat->a;
1526       for (i=0; i<amat->nz; i++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1529 #else
1530         sum += (*v)*(*v); v++;
1531 #endif
1532       }
1533       v = bmat->a;
1534       for (i=0; i<bmat->nz; i++) {
1535 #if defined(PETSC_USE_COMPLEX)
1536         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1537 #else
1538         sum += (*v)*(*v); v++;
1539 #endif
1540       }
1541       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1542       *norm = sqrt(*norm);
1543     } else if (type == NORM_1) { /* max column norm */
1544       PetscReal *tmp,*tmp2;
1545       PetscInt  *jj,*garray = aij->garray;
1546       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1547       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1548       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1549       *norm = 0.0;
1550       v = amat->a; jj = amat->j;
1551       for (j=0; j<amat->nz; j++) {
1552         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1553       }
1554       v = bmat->a; jj = bmat->j;
1555       for (j=0; j<bmat->nz; j++) {
1556         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1557       }
1558       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1559       for (j=0; j<mat->cmap->N; j++) {
1560         if (tmp2[j] > *norm) *norm = tmp2[j];
1561       }
1562       ierr = PetscFree(tmp);CHKERRQ(ierr);
1563       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1564     } else if (type == NORM_INFINITY) { /* max row norm */
1565       PetscReal ntemp = 0.0;
1566       for (j=0; j<aij->A->rmap->n; j++) {
1567         v = amat->a + amat->i[j];
1568         sum = 0.0;
1569         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1570           sum += PetscAbsScalar(*v); v++;
1571         }
1572         v = bmat->a + bmat->i[j];
1573         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1574           sum += PetscAbsScalar(*v); v++;
1575         }
1576         if (sum > ntemp) ntemp = sum;
1577       }
1578       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1579     } else {
1580       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1581     }
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatTranspose_MPIAIJ"
1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1589 {
1590   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1591   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1592   PetscErrorCode ierr;
1593   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1594   PetscInt       cstart=A->cmap->rstart,ncol;
1595   Mat            B;
1596   MatScalar      *array;
1597 
1598   PetscFunctionBegin;
1599   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1600 
1601   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1602   ai = Aloc->i; aj = Aloc->j;
1603   bi = Bloc->i; bj = Bloc->j;
1604   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1605     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1606     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1607     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1608     for (i=0; i<ai[ma]; i++){
1609       d_nnz[aj[i]] ++;
1610       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1611     }
1612 
1613     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1614     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1615     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1616     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1617     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1618   } else {
1619     B = *matout;
1620   }
1621 
1622   /* copy over the A part */
1623   array = Aloc->a;
1624   row = A->rmap->rstart;
1625   for (i=0; i<ma; i++) {
1626     ncol = ai[i+1]-ai[i];
1627     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1628     row++; array += ncol; aj += ncol;
1629   }
1630   aj = Aloc->j;
1631   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1632 
1633   /* copy over the B part */
1634   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1635   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1636   array = Bloc->a;
1637   row = A->rmap->rstart;
1638   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1639   cols_tmp = cols;
1640   for (i=0; i<mb; i++) {
1641     ncol = bi[i+1]-bi[i];
1642     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1643     row++; array += ncol; cols_tmp += ncol;
1644   }
1645   ierr = PetscFree(cols);CHKERRQ(ierr);
1646 
1647   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1648   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1649   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1650     *matout = B;
1651   } else {
1652     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1660 {
1661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1662   Mat            a = aij->A,b = aij->B;
1663   PetscErrorCode ierr;
1664   PetscInt       s1,s2,s3;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1668   if (rr) {
1669     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1670     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1671     /* Overlap communication with computation. */
1672     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673   }
1674   if (ll) {
1675     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1676     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1677     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1678   }
1679   /* scale  the diagonal block */
1680   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1681 
1682   if (rr) {
1683     /* Do a scatter end and then right scale the off-diagonal block */
1684     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1685     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1686   }
1687 
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1694 {
1695   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1700   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1706 {
1707   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatEqual_MPIAIJ"
1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1718 {
1719   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1720   Mat            a,b,c,d;
1721   PetscTruth     flg;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   a = matA->A; b = matA->B;
1726   c = matB->A; d = matB->B;
1727 
1728   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1729   if (flg) {
1730     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1731   }
1732   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatCopy_MPIAIJ"
1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1739 {
1740   PetscErrorCode ierr;
1741   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1742   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1743 
1744   PetscFunctionBegin;
1745   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1746   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1747     /* because of the column compression in the off-processor part of the matrix a->B,
1748        the number of columns in a->B and b->B may be different, hence we cannot call
1749        the MatCopy() directly on the two parts. If need be, we can provide a more
1750        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1751        then copying the submatrices */
1752     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1755     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1763 {
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #include "petscblaslapack.h"
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatAXPY_MPIAIJ"
1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   PetscInt       i;
1778   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqAIJ     *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqAIJ *)xx->A->data;
1786     y = (Mat_SeqAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqAIJ *)xx->B->data;
1790     y = (Mat_SeqAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else if (str == SUBSET_NONZERO_PATTERN) {
1794     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1795 
1796     x = (Mat_SeqAIJ *)xx->B->data;
1797     y = (Mat_SeqAIJ *)yy->B->data;
1798     if (y->xtoy && y->XtoY != xx->B) {
1799       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1800       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1801     }
1802     if (!y->xtoy) { /* get xtoy */
1803       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1804       y->XtoY = xx->B;
1805       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1806     }
1807     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1808   } else {
1809     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatConjugate_MPIAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1819 {
1820 #if defined(PETSC_USE_COMPLEX)
1821   PetscErrorCode ierr;
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823 
1824   PetscFunctionBegin;
1825   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1826   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1827 #else
1828   PetscFunctionBegin;
1829 #endif
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRealPart_MPIAIJ"
1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1836 {
1837   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1842   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1849 {
1850   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1851   PetscErrorCode ierr;
1852 
1853   PetscFunctionBegin;
1854   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1855   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #ifdef PETSC_HAVE_PBGL
1860 
1861 #include <boost/parallel/mpi/bsp_process_group.hpp>
1862 #include <boost/graph/distributed/ilu_default_graph.hpp>
1863 #include <boost/graph/distributed/ilu_0_block.hpp>
1864 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1865 #include <boost/graph/distributed/petsc/interface.hpp>
1866 #include <boost/multi_array.hpp>
1867 #include <boost/parallel/distributed_property_map->hpp>
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1871 /*
1872   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1873 */
1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1875 {
1876   namespace petsc = boost::distributed::petsc;
1877 
1878   namespace graph_dist = boost::graph::distributed;
1879   using boost::graph::distributed::ilu_default::process_group_type;
1880   using boost::graph::ilu_permuted;
1881 
1882   PetscTruth      row_identity, col_identity;
1883   PetscContainer  c;
1884   PetscInt        m, n, M, N;
1885   PetscErrorCode  ierr;
1886 
1887   PetscFunctionBegin;
1888   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1889   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1890   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1891   if (!row_identity || !col_identity) {
1892     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1893   }
1894 
1895   process_group_type pg;
1896   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1897   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1898   lgraph_type&   level_graph = *lgraph_p;
1899   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1900 
1901   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1902   ilu_permuted(level_graph);
1903 
1904   /* put together the new matrix */
1905   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1906   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1907   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1908   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1909   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1910   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1911   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1912 
1913   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1914   ierr = PetscContainerSetPointer(c, lgraph_p);
1915   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1922 {
1923   PetscFunctionBegin;
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatSolve_MPIAIJ"
1929 /*
1930   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1931 */
1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1933 {
1934   namespace graph_dist = boost::graph::distributed;
1935 
1936   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1937   lgraph_type*   lgraph_p;
1938   PetscContainer c;
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1943   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1944   ierr = VecCopy(b, x);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_x;
1947   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1948   PetscInt sx;
1949   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1950 
1951   PetscScalar* array_b;
1952   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1953   PetscInt sb;
1954   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1955 
1956   lgraph_type&   level_graph = *lgraph_p;
1957   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1958 
1959   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1960   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1961                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1962 
1963   typedef boost::iterator_property_map<array_ref_type::iterator,
1964                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1965   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1966                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1967 
1968   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1969 
1970   PetscFunctionReturn(0);
1971 }
1972 #endif
1973 
1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1975   PetscInt       nzlocal,nsends,nrecvs;
1976   PetscMPIInt    *send_rank;
1977   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1978   PetscScalar    *sbuf_a,**rbuf_a;
1979   PetscErrorCode (*MatDestroy)(Mat);
1980 } Mat_Redundant;
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1985 {
1986   PetscErrorCode       ierr;
1987   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1988   PetscInt             i;
1989 
1990   PetscFunctionBegin;
1991   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1992   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1993   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1994   for (i=0; i<redund->nrecvs; i++){
1995     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1996     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1997   }
1998   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1999   ierr = PetscFree(redund);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDestroy_MatRedundant"
2005 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2006 {
2007   PetscErrorCode  ierr;
2008   PetscContainer  container;
2009   Mat_Redundant   *redund=PETSC_NULL;
2010 
2011   PetscFunctionBegin;
2012   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2013   if (container) {
2014     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2015   } else {
2016     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2017   }
2018   A->ops->destroy = redund->MatDestroy;
2019   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2020   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2021   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2028 {
2029   PetscMPIInt    rank,size;
2030   MPI_Comm       comm=((PetscObject)mat)->comm;
2031   PetscErrorCode ierr;
2032   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2033   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2034   PetscInt       *rowrange=mat->rmap->range;
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            A=aij->A,B=aij->B,C=*matredundant;
2037   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2038   PetscScalar    *sbuf_a;
2039   PetscInt       nzlocal=a->nz+b->nz;
2040   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2041   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2042   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2043   MatScalar      *aworkA,*aworkB;
2044   PetscScalar    *vals;
2045   PetscMPIInt    tag1,tag2,tag3,imdex;
2046   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2047                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2048   MPI_Status     recv_status,*send_status;
2049   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2050   PetscInt       **rbuf_j=PETSC_NULL;
2051   PetscScalar    **rbuf_a=PETSC_NULL;
2052   Mat_Redundant  *redund=PETSC_NULL;
2053   PetscContainer container;
2054 
2055   PetscFunctionBegin;
2056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2058 
2059   if (reuse == MAT_REUSE_MATRIX) {
2060     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2061     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2062     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2063     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2064     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2065     if (container) {
2066       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2067     } else {
2068       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2069     }
2070     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2071 
2072     nsends    = redund->nsends;
2073     nrecvs    = redund->nrecvs;
2074     send_rank = redund->send_rank; recv_rank = send_rank + size;
2075     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2076     sbuf_j    = redund->sbuf_j;
2077     sbuf_a    = redund->sbuf_a;
2078     rbuf_j    = redund->rbuf_j;
2079     rbuf_a    = redund->rbuf_a;
2080   }
2081 
2082   if (reuse == MAT_INITIAL_MATRIX){
2083     PetscMPIInt  subrank,subsize;
2084     PetscInt     nleftover,np_subcomm;
2085     /* get the destination processors' id send_rank, nsends and nrecvs */
2086     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2087     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2088     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2089     recv_rank = send_rank + size;
2090     np_subcomm = size/nsubcomm;
2091     nleftover  = size - nsubcomm*np_subcomm;
2092     nsends = 0; nrecvs = 0;
2093     for (i=0; i<size; i++){ /* i=rank*/
2094       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2095         send_rank[nsends] = i; nsends++;
2096         recv_rank[nrecvs++] = i;
2097       }
2098     }
2099     if (rank >= size - nleftover){/* this proc is a leftover processor */
2100       i = size-nleftover-1;
2101       j = 0;
2102       while (j < nsubcomm - nleftover){
2103         send_rank[nsends++] = i;
2104         i--; j++;
2105       }
2106     }
2107 
2108     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2109       for (i=0; i<nleftover; i++){
2110         recv_rank[nrecvs++] = size-nleftover+i;
2111       }
2112     }
2113 
2114     /* allocate sbuf_j, sbuf_a */
2115     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2116     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2117     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2118   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2119 
2120   /* copy mat's local entries into the buffers */
2121   if (reuse == MAT_INITIAL_MATRIX){
2122     rownz_max = 0;
2123     rptr = sbuf_j;
2124     cols = sbuf_j + rend-rstart + 1;
2125     vals = sbuf_a;
2126     rptr[0] = 0;
2127     for (i=0; i<rend-rstart; i++){
2128       row = i + rstart;
2129       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2130       ncols  = nzA + nzB;
2131       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2132       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2133       /* load the column indices for this row into cols */
2134       lwrite = 0;
2135       for (l=0; l<nzB; l++) {
2136         if ((ctmp = bmap[cworkB[l]]) < cstart){
2137           vals[lwrite]   = aworkB[l];
2138           cols[lwrite++] = ctmp;
2139         }
2140       }
2141       for (l=0; l<nzA; l++){
2142         vals[lwrite]   = aworkA[l];
2143         cols[lwrite++] = cstart + cworkA[l];
2144       }
2145       for (l=0; l<nzB; l++) {
2146         if ((ctmp = bmap[cworkB[l]]) >= cend){
2147           vals[lwrite]   = aworkB[l];
2148           cols[lwrite++] = ctmp;
2149         }
2150       }
2151       vals += ncols;
2152       cols += ncols;
2153       rptr[i+1] = rptr[i] + ncols;
2154       if (rownz_max < ncols) rownz_max = ncols;
2155     }
2156     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2157   } else { /* only copy matrix values into sbuf_a */
2158     rptr = sbuf_j;
2159     vals = sbuf_a;
2160     rptr[0] = 0;
2161     for (i=0; i<rend-rstart; i++){
2162       row = i + rstart;
2163       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2164       ncols  = nzA + nzB;
2165       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2166       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2167       lwrite = 0;
2168       for (l=0; l<nzB; l++) {
2169         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2170       }
2171       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2172       for (l=0; l<nzB; l++) {
2173         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2174       }
2175       vals += ncols;
2176       rptr[i+1] = rptr[i] + ncols;
2177     }
2178   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2179 
2180   /* send nzlocal to others, and recv other's nzlocal */
2181   /*--------------------------------------------------*/
2182   if (reuse == MAT_INITIAL_MATRIX){
2183     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2184     s_waits2 = s_waits3 + nsends;
2185     s_waits1 = s_waits2 + nsends;
2186     r_waits1 = s_waits1 + nsends;
2187     r_waits2 = r_waits1 + nrecvs;
2188     r_waits3 = r_waits2 + nrecvs;
2189   } else {
2190     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2191     r_waits3 = s_waits3 + nsends;
2192   }
2193 
2194   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2195   if (reuse == MAT_INITIAL_MATRIX){
2196     /* get new tags to keep the communication clean */
2197     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2198     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2199     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2200     rbuf_nz = sbuf_nz + nsends;
2201 
2202     /* post receives of other's nzlocal */
2203     for (i=0; i<nrecvs; i++){
2204       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* send nzlocal to others */
2207     for (i=0; i<nsends; i++){
2208       sbuf_nz[i] = nzlocal;
2209       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2210     }
2211     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2212     count = nrecvs;
2213     while (count) {
2214       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2215       recv_rank[imdex] = recv_status.MPI_SOURCE;
2216       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2217       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2218 
2219       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2220       rbuf_nz[imdex] += i + 2;
2221       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2222       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2223       count--;
2224     }
2225     /* wait on sends of nzlocal */
2226     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2227     /* send mat->i,j to others, and recv from other's */
2228     /*------------------------------------------------*/
2229     for (i=0; i<nsends; i++){
2230       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2231       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2232     }
2233     /* wait on receives of mat->i,j */
2234     /*------------------------------*/
2235     count = nrecvs;
2236     while (count) {
2237       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2238       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2239       count--;
2240     }
2241     /* wait on sends of mat->i,j */
2242     /*---------------------------*/
2243     if (nsends) {
2244       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2245     }
2246   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2247 
2248   /* post receives, send and receive mat->a */
2249   /*----------------------------------------*/
2250   for (imdex=0; imdex<nrecvs; imdex++) {
2251     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2252   }
2253   for (i=0; i<nsends; i++){
2254     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2255   }
2256   count = nrecvs;
2257   while (count) {
2258     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2259     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2260     count--;
2261   }
2262   if (nsends) {
2263     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2264   }
2265 
2266   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2267 
2268   /* create redundant matrix */
2269   /*-------------------------*/
2270   if (reuse == MAT_INITIAL_MATRIX){
2271     /* compute rownz_max for preallocation */
2272     for (imdex=0; imdex<nrecvs; imdex++){
2273       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2274       rptr = rbuf_j[imdex];
2275       for (i=0; i<j; i++){
2276         ncols = rptr[i+1] - rptr[i];
2277         if (rownz_max < ncols) rownz_max = ncols;
2278       }
2279     }
2280 
2281     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2282     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2283     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2284     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2285     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2286   } else {
2287     C = *matredundant;
2288   }
2289 
2290   /* insert local matrix entries */
2291   rptr = sbuf_j;
2292   cols = sbuf_j + rend-rstart + 1;
2293   vals = sbuf_a;
2294   for (i=0; i<rend-rstart; i++){
2295     row   = i + rstart;
2296     ncols = rptr[i+1] - rptr[i];
2297     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2298     vals += ncols;
2299     cols += ncols;
2300   }
2301   /* insert received matrix entries */
2302   for (imdex=0; imdex<nrecvs; imdex++){
2303     rstart = rowrange[recv_rank[imdex]];
2304     rend   = rowrange[recv_rank[imdex]+1];
2305     rptr = rbuf_j[imdex];
2306     cols = rbuf_j[imdex] + rend-rstart + 1;
2307     vals = rbuf_a[imdex];
2308     for (i=0; i<rend-rstart; i++){
2309       row   = i + rstart;
2310       ncols = rptr[i+1] - rptr[i];
2311       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2312       vals += ncols;
2313       cols += ncols;
2314     }
2315   }
2316   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2318   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2319   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2320   if (reuse == MAT_INITIAL_MATRIX){
2321     PetscContainer container;
2322     *matredundant = C;
2323     /* create a supporting struct and attach it to C for reuse */
2324     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2325     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2326     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2327     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2328     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2329 
2330     redund->nzlocal = nzlocal;
2331     redund->nsends  = nsends;
2332     redund->nrecvs  = nrecvs;
2333     redund->send_rank = send_rank;
2334     redund->sbuf_nz = sbuf_nz;
2335     redund->sbuf_j  = sbuf_j;
2336     redund->sbuf_a  = sbuf_a;
2337     redund->rbuf_j  = rbuf_j;
2338     redund->rbuf_a  = rbuf_a;
2339 
2340     redund->MatDestroy = C->ops->destroy;
2341     C->ops->destroy    = MatDestroy_MatRedundant;
2342   }
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352   PetscInt       i,*idxb = 0;
2353   PetscScalar    *va,*vb;
2354   Vec            vtmp;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2359   if (idx) {
2360     for (i=0; i<A->rmap->n; i++) {
2361       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2362     }
2363   }
2364 
2365   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2366   if (idx) {
2367     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2368   }
2369   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2370   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2371 
2372   for (i=0; i<A->rmap->n; i++){
2373     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2374       va[i] = vb[i];
2375       if (idx) idx[i] = a->garray[idxb[i]];
2376     }
2377   }
2378 
2379   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2381   if (idxb) {
2382     ierr = PetscFree(idxb);CHKERRQ(ierr);
2383   }
2384   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,*idxb = 0;
2395   PetscScalar    *va,*vb;
2396   Vec            vtmp;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2401   if (idx) {
2402     for (i=0; i<A->cmap->n; i++) {
2403       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2404     }
2405   }
2406 
2407   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2408   if (idx) {
2409     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2410   }
2411   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2412   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2413 
2414   for (i=0; i<A->rmap->n; i++){
2415     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2416       va[i] = vb[i];
2417       if (idx) idx[i] = a->garray[idxb[i]];
2418     }
2419   }
2420 
2421   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2423   if (idxb) {
2424     ierr = PetscFree(idxb);CHKERRQ(ierr);
2425   }
2426   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2435   PetscInt       n      = A->rmap->n;
2436   PetscInt       cstart = A->cmap->rstart;
2437   PetscInt      *cmap   = mat->garray;
2438   PetscInt      *diagIdx, *offdiagIdx;
2439   Vec            diagV, offdiagV;
2440   PetscScalar   *a, *diagA, *offdiagA;
2441   PetscInt       r;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2447   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2449   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2451   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2452   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2453   for(r = 0; r < n; ++r) {
2454     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2455       a[r]   = diagA[r];
2456       idx[r] = cstart + diagIdx[r];
2457     } else {
2458       a[r]   = offdiagA[r];
2459       idx[r] = cmap[offdiagIdx[r]];
2460     }
2461   }
2462   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2465   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2466   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2467   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2474 {
2475   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2476   PetscInt       n      = A->rmap->n;
2477   PetscInt       cstart = A->cmap->rstart;
2478   PetscInt      *cmap   = mat->garray;
2479   PetscInt      *diagIdx, *offdiagIdx;
2480   Vec            diagV, offdiagV;
2481   PetscScalar   *a, *diagA, *offdiagA;
2482   PetscInt       r;
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2488   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2489   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2490   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2492   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2493   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2494   for(r = 0; r < n; ++r) {
2495     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2496       a[r]   = diagA[r];
2497       idx[r] = cstart + diagIdx[r];
2498     } else {
2499       a[r]   = offdiagA[r];
2500       idx[r] = cmap[offdiagIdx[r]];
2501     }
2502   }
2503   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2507   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2508   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2515 {
2516   PetscErrorCode ierr;
2517 
2518   PetscFunctionBegin;
2519   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 /* -------------------------------------------------------------------*/
2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2525        MatGetRow_MPIAIJ,
2526        MatRestoreRow_MPIAIJ,
2527        MatMult_MPIAIJ,
2528 /* 4*/ MatMultAdd_MPIAIJ,
2529        MatMultTranspose_MPIAIJ,
2530        MatMultTransposeAdd_MPIAIJ,
2531 #ifdef PETSC_HAVE_PBGL
2532        MatSolve_MPIAIJ,
2533 #else
2534        0,
2535 #endif
2536        0,
2537        0,
2538 /*10*/ 0,
2539        0,
2540        0,
2541        MatRelax_MPIAIJ,
2542        MatTranspose_MPIAIJ,
2543 /*15*/ MatGetInfo_MPIAIJ,
2544        MatEqual_MPIAIJ,
2545        MatGetDiagonal_MPIAIJ,
2546        MatDiagonalScale_MPIAIJ,
2547        MatNorm_MPIAIJ,
2548 /*20*/ MatAssemblyBegin_MPIAIJ,
2549        MatAssemblyEnd_MPIAIJ,
2550        MatSetOption_MPIAIJ,
2551        MatZeroEntries_MPIAIJ,
2552 /*24*/ MatZeroRows_MPIAIJ,
2553        0,
2554 #ifdef PETSC_HAVE_PBGL
2555        0,
2556 #else
2557        0,
2558 #endif
2559        0,
2560        0,
2561 /*29*/ MatSetUpPreallocation_MPIAIJ,
2562 #ifdef PETSC_HAVE_PBGL
2563        0,
2564 #else
2565        0,
2566 #endif
2567        0,
2568        0,
2569        0,
2570 /*34*/ MatDuplicate_MPIAIJ,
2571        0,
2572        0,
2573        0,
2574        0,
2575 /*39*/ MatAXPY_MPIAIJ,
2576        MatGetSubMatrices_MPIAIJ,
2577        MatIncreaseOverlap_MPIAIJ,
2578        MatGetValues_MPIAIJ,
2579        MatCopy_MPIAIJ,
2580 /*44*/ MatGetRowMax_MPIAIJ,
2581        MatScale_MPIAIJ,
2582        0,
2583        0,
2584        0,
2585 /*49*/ MatSetBlockSize_MPIAIJ,
2586        0,
2587        0,
2588        0,
2589        0,
2590 /*54*/ MatFDColoringCreate_MPIAIJ,
2591        0,
2592        MatSetUnfactored_MPIAIJ,
2593        MatPermute_MPIAIJ,
2594        0,
2595 /*59*/ MatGetSubMatrix_MPIAIJ,
2596        MatDestroy_MPIAIJ,
2597        MatView_MPIAIJ,
2598        0,
2599        0,
2600 /*64*/ 0,
2601        0,
2602        0,
2603        0,
2604        0,
2605 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2606        MatGetRowMinAbs_MPIAIJ,
2607        0,
2608        MatSetColoring_MPIAIJ,
2609 #if defined(PETSC_HAVE_ADIC)
2610        MatSetValuesAdic_MPIAIJ,
2611 #else
2612        0,
2613 #endif
2614        MatSetValuesAdifor_MPIAIJ,
2615 /*75*/ 0,
2616        0,
2617        0,
2618        0,
2619        0,
2620 /*80*/ 0,
2621        0,
2622        0,
2623 /*83*/ MatLoad_MPIAIJ,
2624        0,
2625        0,
2626        0,
2627        0,
2628        0,
2629 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2630        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2631        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2632        MatPtAP_Basic,
2633        MatPtAPSymbolic_MPIAIJ,
2634 /*94*/ MatPtAPNumeric_MPIAIJ,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*99*/ 0,
2640        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2641        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2642        MatConjugate_MPIAIJ,
2643        0,
2644 /*104*/MatSetValuesRow_MPIAIJ,
2645        MatRealPart_MPIAIJ,
2646        MatImaginaryPart_MPIAIJ,
2647        0,
2648        0,
2649 /*109*/0,
2650        MatGetRedundantMatrix_MPIAIJ,
2651        MatGetRowMin_MPIAIJ,
2652        0,
2653        0,
2654 /*114*/MatGetSeqNonzerostructure_MPIAIJ};
2655 
2656 /* ----------------------------------------------------------------------------------------*/
2657 
2658 EXTERN_C_BEGIN
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2661 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2662 {
2663   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2664   PetscErrorCode ierr;
2665 
2666   PetscFunctionBegin;
2667   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2668   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2669   PetscFunctionReturn(0);
2670 }
2671 EXTERN_C_END
2672 
2673 EXTERN_C_BEGIN
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2676 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2677 {
2678   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2679   PetscErrorCode ierr;
2680 
2681   PetscFunctionBegin;
2682   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2683   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2684   PetscFunctionReturn(0);
2685 }
2686 EXTERN_C_END
2687 
2688 #include "petscpc.h"
2689 EXTERN_C_BEGIN
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2693 {
2694   Mat_MPIAIJ     *b;
2695   PetscErrorCode ierr;
2696   PetscInt       i;
2697 
2698   PetscFunctionBegin;
2699   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2700   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2701   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2702   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2703 
2704   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2705   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2706   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2707   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2708   if (d_nnz) {
2709     for (i=0; i<B->rmap->n; i++) {
2710       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2711     }
2712   }
2713   if (o_nnz) {
2714     for (i=0; i<B->rmap->n; i++) {
2715       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2716     }
2717   }
2718   b = (Mat_MPIAIJ*)B->data;
2719 
2720   if (!B->preallocated) {
2721     /* Explicitly create 2 MATSEQAIJ matrices. */
2722     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2723     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2724     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2725     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2726     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2727     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2728     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2729     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2730   }
2731 
2732   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2733   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2734   B->preallocated = PETSC_TRUE;
2735   PetscFunctionReturn(0);
2736 }
2737 EXTERN_C_END
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2742 {
2743   Mat            mat;
2744   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   *newmat       = 0;
2749   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2750   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2751   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2752   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2753   a    = (Mat_MPIAIJ*)mat->data;
2754 
2755   mat->factor       = matin->factor;
2756   mat->rmap->bs      = matin->rmap->bs;
2757   mat->assembled    = PETSC_TRUE;
2758   mat->insertmode   = NOT_SET_VALUES;
2759   mat->preallocated = PETSC_TRUE;
2760 
2761   a->size           = oldmat->size;
2762   a->rank           = oldmat->rank;
2763   a->donotstash     = oldmat->donotstash;
2764   a->roworiented    = oldmat->roworiented;
2765   a->rowindices     = 0;
2766   a->rowvalues      = 0;
2767   a->getrowactive   = PETSC_FALSE;
2768 
2769   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2770   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2771 
2772   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2773   if (oldmat->colmap) {
2774 #if defined (PETSC_USE_CTABLE)
2775     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2776 #else
2777     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2778     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2779     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2780 #endif
2781   } else a->colmap = 0;
2782   if (oldmat->garray) {
2783     PetscInt len;
2784     len  = oldmat->B->cmap->n;
2785     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2786     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2787     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2788   } else a->garray = 0;
2789 
2790   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2791   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2792   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2793   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2794   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2795   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2796   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2797   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2798   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2799   *newmat = mat;
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #include "petscsys.h"
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatLoad_MPIAIJ"
2807 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2808 {
2809   Mat            A;
2810   PetscScalar    *vals,*svals;
2811   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2812   MPI_Status     status;
2813   PetscErrorCode ierr;
2814   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2815   PetscInt       i,nz,j,rstart,rend,mmax,maxnz;
2816   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2817   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2818   PetscInt       cend,cstart,n,*rowners;
2819   int            fd;
2820 
2821   PetscFunctionBegin;
2822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2823   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2824   if (!rank) {
2825     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2826     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2827     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2828   }
2829 
2830   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2831   M = header[1]; N = header[2];
2832   /* determine ownership of all rows */
2833   m    = M/size + ((M % size) > rank);
2834   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2835   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2836 
2837   /* First process needs enough room for process with most rows */
2838   if (!rank) {
2839     mmax       = rowners[1];
2840     for (i=2; i<size; i++) {
2841       mmax = PetscMax(mmax,rowners[i]);
2842     }
2843   } else mmax = m;
2844 
2845   rowners[0] = 0;
2846   for (i=2; i<=size; i++) {
2847     rowners[i] += rowners[i-1];
2848   }
2849   rstart = rowners[rank];
2850   rend   = rowners[rank+1];
2851 
2852   /* distribute row lengths to all processors */
2853   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2854   if (!rank) {
2855     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2856     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2857     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2858     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2859     for (j=0; j<m; j++) {
2860       procsnz[0] += ourlens[j];
2861     }
2862     for (i=1; i<size; i++) {
2863       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2864       /* calculate the number of nonzeros on each processor */
2865       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2866         procsnz[i] += rowlengths[j];
2867       }
2868       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2869       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2870     }
2871     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2872   } else {
2873     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2874     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2875   }
2876 
2877   if (!rank) {
2878     /* determine max buffer needed and allocate it */
2879     maxnz = 0;
2880     for (i=0; i<size; i++) {
2881       maxnz = PetscMax(maxnz,procsnz[i]);
2882     }
2883     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2884 
2885     /* read in my part of the matrix column indices  */
2886     nz   = procsnz[0];
2887     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2888     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2889 
2890     /* read in every one elses and ship off */
2891     for (i=1; i<size; i++) {
2892       nz     = procsnz[i];
2893       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2894       mpicnt = PetscMPIIntCast(nz);
2895       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2896     }
2897     ierr = PetscFree(cols);CHKERRQ(ierr);
2898   } else {
2899     /* determine buffer space needed for message */
2900     nz = 0;
2901     for (i=0; i<m; i++) {
2902       nz += ourlens[i];
2903     }
2904     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2905 
2906     /* receive message of column indices*/
2907     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2908     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2909     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2910     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2911     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2912     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2913   }
2914 
2915   /* determine column ownership if matrix is not square */
2916   if (N != M) {
2917     n      = N/size + ((N % size) > rank);
2918     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2919     cstart = cend - n;
2920   } else {
2921     cstart = rstart;
2922     cend   = rend;
2923     n      = cend - cstart;
2924   }
2925 
2926   /* loop over local rows, determining number of off diagonal entries */
2927   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2928   jj = 0;
2929   for (i=0; i<m; i++) {
2930     for (j=0; j<ourlens[i]; j++) {
2931       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2932       jj++;
2933     }
2934   }
2935 
2936   /* create our matrix */
2937   for (i=0; i<m; i++) {
2938     ourlens[i] -= offlens[i];
2939   }
2940   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2941   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2942   ierr = MatSetType(A,type);CHKERRQ(ierr);
2943   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2944 
2945   for (i=0; i<m; i++) {
2946     ourlens[i] += offlens[i];
2947   }
2948 
2949   if (!rank) {
2950     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2951 
2952     /* read in my part of the matrix numerical values  */
2953     nz   = procsnz[0];
2954     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2955 
2956     /* insert into matrix */
2957     jj      = rstart;
2958     smycols = mycols;
2959     svals   = vals;
2960     for (i=0; i<m; i++) {
2961       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2962       smycols += ourlens[i];
2963       svals   += ourlens[i];
2964       jj++;
2965     }
2966 
2967     /* read in other processors and ship out */
2968     for (i=1; i<size; i++) {
2969       nz     = procsnz[i];
2970       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2971       mpicnt = PetscMPIIntCast(nz);
2972       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2973     }
2974     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2975   } else {
2976     /* receive numeric values */
2977     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2978 
2979     /* receive message of values*/
2980     mpicnt = PetscMPIIntCast(nz);
2981     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2982     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
2983     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2984     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2985     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2986 
2987     /* insert into matrix */
2988     jj      = rstart;
2989     smycols = mycols;
2990     svals   = vals;
2991     for (i=0; i<m; i++) {
2992       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2993       smycols += ourlens[i];
2994       svals   += ourlens[i];
2995       jj++;
2996     }
2997   }
2998   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2999   ierr = PetscFree(vals);CHKERRQ(ierr);
3000   ierr = PetscFree(mycols);CHKERRQ(ierr);
3001   ierr = PetscFree(rowners);CHKERRQ(ierr);
3002 
3003   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005   *newmat = A;
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3011 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3012 {
3013   PetscErrorCode ierr;
3014   IS             iscol_local;
3015   PetscInt       csize;
3016 
3017   PetscFunctionBegin;
3018   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3019   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3020   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3021   ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3022   PetscFunctionReturn(0);
3023 }
3024 
3025 #undef __FUNCT__
3026 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3027 /*
3028     Not great since it makes two copies of the submatrix, first an SeqAIJ
3029   in local and then by concatenating the local matrices the end result.
3030   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3031 
3032   Note: This requires a sequential iscol with all indices.
3033 */
3034 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3035 {
3036   PetscErrorCode ierr;
3037   PetscMPIInt    rank,size;
3038   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3039   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3040   Mat            *local,M,Mreuse;
3041   MatScalar      *vwork,*aa;
3042   MPI_Comm       comm = ((PetscObject)mat)->comm;
3043   Mat_SeqAIJ     *aij;
3044 
3045 
3046   PetscFunctionBegin;
3047   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3048   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3049 
3050   if (call ==  MAT_REUSE_MATRIX) {
3051     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3052     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3053     local = &Mreuse;
3054     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3055   } else {
3056     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3057     Mreuse = *local;
3058     ierr   = PetscFree(local);CHKERRQ(ierr);
3059   }
3060 
3061   /*
3062       m - number of local rows
3063       n - number of columns (same on all processors)
3064       rstart - first row in new global matrix generated
3065   */
3066   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3067   if (call == MAT_INITIAL_MATRIX) {
3068     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3069     ii  = aij->i;
3070     jj  = aij->j;
3071 
3072     /*
3073         Determine the number of non-zeros in the diagonal and off-diagonal
3074         portions of the matrix in order to do correct preallocation
3075     */
3076 
3077     /* first get start and end of "diagonal" columns */
3078     if (csize == PETSC_DECIDE) {
3079       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3080       if (mglobal == n) { /* square matrix */
3081 	nlocal = m;
3082       } else {
3083         nlocal = n/size + ((n % size) > rank);
3084       }
3085     } else {
3086       nlocal = csize;
3087     }
3088     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3089     rstart = rend - nlocal;
3090     if (rank == size - 1 && rend != n) {
3091       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3092     }
3093 
3094     /* next, compute all the lengths */
3095     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3096     olens = dlens + m;
3097     for (i=0; i<m; i++) {
3098       jend = ii[i+1] - ii[i];
3099       olen = 0;
3100       dlen = 0;
3101       for (j=0; j<jend; j++) {
3102         if (*jj < rstart || *jj >= rend) olen++;
3103         else dlen++;
3104         jj++;
3105       }
3106       olens[i] = olen;
3107       dlens[i] = dlen;
3108     }
3109     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3110     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3111     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3112     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3113     ierr = PetscFree(dlens);CHKERRQ(ierr);
3114   } else {
3115     PetscInt ml,nl;
3116 
3117     M = *newmat;
3118     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3119     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3120     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3121     /*
3122          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3123        rather than the slower MatSetValues().
3124     */
3125     M->was_assembled = PETSC_TRUE;
3126     M->assembled     = PETSC_FALSE;
3127   }
3128   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3129   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3130   ii  = aij->i;
3131   jj  = aij->j;
3132   aa  = aij->a;
3133   for (i=0; i<m; i++) {
3134     row   = rstart + i;
3135     nz    = ii[i+1] - ii[i];
3136     cwork = jj;     jj += nz;
3137     vwork = aa;     aa += nz;
3138     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3139   }
3140 
3141   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143   *newmat = M;
3144 
3145   /* save submatrix used in processor for next request */
3146   if (call ==  MAT_INITIAL_MATRIX) {
3147     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3148     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3149   }
3150 
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 EXTERN_C_BEGIN
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3157 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3158 {
3159   PetscInt       m,cstart, cend,j,nnz,i,d;
3160   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3161   const PetscInt *JJ;
3162   PetscScalar    *values;
3163   PetscErrorCode ierr;
3164 
3165   PetscFunctionBegin;
3166   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3167 
3168   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3169   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3170   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3171   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3172   m      = B->rmap->n;
3173   cstart = B->cmap->rstart;
3174   cend   = B->cmap->rend;
3175   rstart = B->rmap->rstart;
3176 
3177   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3178   o_nnz = d_nnz + m;
3179 
3180 #if defined(PETSC_USE_DEBUGGING)
3181   for (i=0; i<m; i++) {
3182     nnz     = Ii[i+1]- Ii[i];
3183     JJ      = J + Ii[i];
3184     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3185     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3186     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3187     for (j=1; j<nnz; j++) {
3188       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3189     }
3190   }
3191 #endif
3192 
3193   for (i=0; i<m; i++) {
3194     nnz     = Ii[i+1]- Ii[i];
3195     JJ      = J + Ii[i];
3196     nnz_max = PetscMax(nnz_max,nnz);
3197     for (j=0; j<nnz; j++) {
3198       if (*JJ >= cstart) break;
3199       JJ++;
3200     }
3201     d = 0;
3202     for (; j<nnz; j++) {
3203       if (*JJ++ >= cend) break;
3204       d++;
3205     }
3206     d_nnz[i] = d;
3207     o_nnz[i] = nnz - d;
3208   }
3209   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3210   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3211 
3212   if (v) values = (PetscScalar*)v;
3213   else {
3214     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3215     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3216   }
3217 
3218   for (i=0; i<m; i++) {
3219     ii   = i + rstart;
3220     nnz  = Ii[i+1]- Ii[i];
3221     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3222   }
3223   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3224   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3225 
3226   if (!v) {
3227     ierr = PetscFree(values);CHKERRQ(ierr);
3228   }
3229   PetscFunctionReturn(0);
3230 }
3231 EXTERN_C_END
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3235 /*@
3236    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3237    (the default parallel PETSc format).
3238 
3239    Collective on MPI_Comm
3240 
3241    Input Parameters:
3242 +  B - the matrix
3243 .  i - the indices into j for the start of each local row (starts with zero)
3244 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3245 -  v - optional values in the matrix
3246 
3247    Level: developer
3248 
3249    Notes:
3250        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3251      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3252      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3253 
3254        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3255 
3256        The format which is used for the sparse matrix input, is equivalent to a
3257     row-major ordering.. i.e for the following matrix, the input data expected is
3258     as shown:
3259 
3260         1 0 0
3261         2 0 3     P0
3262        -------
3263         4 5 6     P1
3264 
3265      Process0 [P0]: rows_owned=[0,1]
3266         i =  {0,1,3}  [size = nrow+1  = 2+1]
3267         j =  {0,0,2}  [size = nz = 6]
3268         v =  {1,2,3}  [size = nz = 6]
3269 
3270      Process1 [P1]: rows_owned=[2]
3271         i =  {0,3}    [size = nrow+1  = 1+1]
3272         j =  {0,1,2}  [size = nz = 6]
3273         v =  {4,5,6}  [size = nz = 6]
3274 
3275       The column indices for each row MUST be sorted.
3276 
3277 .keywords: matrix, aij, compressed row, sparse, parallel
3278 
3279 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3280           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3281 @*/
3282 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3283 {
3284   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3285 
3286   PetscFunctionBegin;
3287   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3288   if (f) {
3289     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3290   }
3291   PetscFunctionReturn(0);
3292 }
3293 
3294 #undef __FUNCT__
3295 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3296 /*@C
3297    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3298    (the default parallel PETSc format).  For good matrix assembly performance
3299    the user should preallocate the matrix storage by setting the parameters
3300    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3301    performance can be increased by more than a factor of 50.
3302 
3303    Collective on MPI_Comm
3304 
3305    Input Parameters:
3306 +  A - the matrix
3307 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3308            (same value is used for all local rows)
3309 .  d_nnz - array containing the number of nonzeros in the various rows of the
3310            DIAGONAL portion of the local submatrix (possibly different for each row)
3311            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3312            The size of this array is equal to the number of local rows, i.e 'm'.
3313            You must leave room for the diagonal entry even if it is zero.
3314 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3315            submatrix (same value is used for all local rows).
3316 -  o_nnz - array containing the number of nonzeros in the various rows of the
3317            OFF-DIAGONAL portion of the local submatrix (possibly different for
3318            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3319            structure. The size of this array is equal to the number
3320            of local rows, i.e 'm'.
3321 
3322    If the *_nnz parameter is given then the *_nz parameter is ignored
3323 
3324    The AIJ format (also called the Yale sparse matrix format or
3325    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3326    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3327 
3328    The parallel matrix is partitioned such that the first m0 rows belong to
3329    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3330    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3331 
3332    The DIAGONAL portion of the local submatrix of a processor can be defined
3333    as the submatrix which is obtained by extraction the part corresponding
3334    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3335    first row that belongs to the processor, and r2 is the last row belonging
3336    to the this processor. This is a square mxm matrix. The remaining portion
3337    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3338 
3339    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3340 
3341    You can call MatGetInfo() to get information on how effective the preallocation was;
3342    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3343    You can also run with the option -info and look for messages with the string
3344    malloc in them to see if additional memory allocation was needed.
3345 
3346    Example usage:
3347 
3348    Consider the following 8x8 matrix with 34 non-zero values, that is
3349    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3350    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3351    as follows:
3352 
3353 .vb
3354             1  2  0  |  0  3  0  |  0  4
3355     Proc0   0  5  6  |  7  0  0  |  8  0
3356             9  0 10  | 11  0  0  | 12  0
3357     -------------------------------------
3358            13  0 14  | 15 16 17  |  0  0
3359     Proc1   0 18  0  | 19 20 21  |  0  0
3360             0  0  0  | 22 23  0  | 24  0
3361     -------------------------------------
3362     Proc2  25 26 27  |  0  0 28  | 29  0
3363            30  0  0  | 31 32 33  |  0 34
3364 .ve
3365 
3366    This can be represented as a collection of submatrices as:
3367 
3368 .vb
3369       A B C
3370       D E F
3371       G H I
3372 .ve
3373 
3374    Where the submatrices A,B,C are owned by proc0, D,E,F are
3375    owned by proc1, G,H,I are owned by proc2.
3376 
3377    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3378    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3379    The 'M','N' parameters are 8,8, and have the same values on all procs.
3380 
3381    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3382    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3383    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3384    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3385    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3386    matrix, ans [DF] as another SeqAIJ matrix.
3387 
3388    When d_nz, o_nz parameters are specified, d_nz storage elements are
3389    allocated for every row of the local diagonal submatrix, and o_nz
3390    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3391    One way to choose d_nz and o_nz is to use the max nonzerors per local
3392    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3393    In this case, the values of d_nz,o_nz are:
3394 .vb
3395      proc0 : dnz = 2, o_nz = 2
3396      proc1 : dnz = 3, o_nz = 2
3397      proc2 : dnz = 1, o_nz = 4
3398 .ve
3399    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3400    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3401    for proc3. i.e we are using 12+15+10=37 storage locations to store
3402    34 values.
3403 
3404    When d_nnz, o_nnz parameters are specified, the storage is specified
3405    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3406    In the above case the values for d_nnz,o_nnz are:
3407 .vb
3408      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3409      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3410      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3411 .ve
3412    Here the space allocated is sum of all the above values i.e 34, and
3413    hence pre-allocation is perfect.
3414 
3415    Level: intermediate
3416 
3417 .keywords: matrix, aij, compressed row, sparse, parallel
3418 
3419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3420           MPIAIJ, MatGetInfo()
3421 @*/
3422 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3423 {
3424   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3425 
3426   PetscFunctionBegin;
3427   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3428   if (f) {
3429     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3430   }
3431   PetscFunctionReturn(0);
3432 }
3433 
3434 #undef __FUNCT__
3435 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3436 /*@
3437      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3438          CSR format the local rows.
3439 
3440    Collective on MPI_Comm
3441 
3442    Input Parameters:
3443 +  comm - MPI communicator
3444 .  m - number of local rows (Cannot be PETSC_DECIDE)
3445 .  n - This value should be the same as the local size used in creating the
3446        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3447        calculated if N is given) For square matrices n is almost always m.
3448 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3449 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3450 .   i - row indices
3451 .   j - column indices
3452 -   a - matrix values
3453 
3454    Output Parameter:
3455 .   mat - the matrix
3456 
3457    Level: intermediate
3458 
3459    Notes:
3460        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3461      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3462      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3463 
3464        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3465 
3466        The format which is used for the sparse matrix input, is equivalent to a
3467     row-major ordering.. i.e for the following matrix, the input data expected is
3468     as shown:
3469 
3470         1 0 0
3471         2 0 3     P0
3472        -------
3473         4 5 6     P1
3474 
3475      Process0 [P0]: rows_owned=[0,1]
3476         i =  {0,1,3}  [size = nrow+1  = 2+1]
3477         j =  {0,0,2}  [size = nz = 6]
3478         v =  {1,2,3}  [size = nz = 6]
3479 
3480      Process1 [P1]: rows_owned=[2]
3481         i =  {0,3}    [size = nrow+1  = 1+1]
3482         j =  {0,1,2}  [size = nz = 6]
3483         v =  {4,5,6}  [size = nz = 6]
3484 
3485 .keywords: matrix, aij, compressed row, sparse, parallel
3486 
3487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3488           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3489 @*/
3490 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3491 {
3492   PetscErrorCode ierr;
3493 
3494  PetscFunctionBegin;
3495   if (i[0]) {
3496     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3497   }
3498   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3499   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3500   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3501   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3502   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatCreateMPIAIJ"
3508 /*@C
3509    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3510    (the default parallel PETSc format).  For good matrix assembly performance
3511    the user should preallocate the matrix storage by setting the parameters
3512    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3513    performance can be increased by more than a factor of 50.
3514 
3515    Collective on MPI_Comm
3516 
3517    Input Parameters:
3518 +  comm - MPI communicator
3519 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3520            This value should be the same as the local size used in creating the
3521            y vector for the matrix-vector product y = Ax.
3522 .  n - This value should be the same as the local size used in creating the
3523        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3524        calculated if N is given) For square matrices n is almost always m.
3525 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3526 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3527 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3528            (same value is used for all local rows)
3529 .  d_nnz - array containing the number of nonzeros in the various rows of the
3530            DIAGONAL portion of the local submatrix (possibly different for each row)
3531            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3532            The size of this array is equal to the number of local rows, i.e 'm'.
3533            You must leave room for the diagonal entry even if it is zero.
3534 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3535            submatrix (same value is used for all local rows).
3536 -  o_nnz - array containing the number of nonzeros in the various rows of the
3537            OFF-DIAGONAL portion of the local submatrix (possibly different for
3538            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3539            structure. The size of this array is equal to the number
3540            of local rows, i.e 'm'.
3541 
3542    Output Parameter:
3543 .  A - the matrix
3544 
3545    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3546    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3547    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3548 
3549    Notes:
3550    If the *_nnz parameter is given then the *_nz parameter is ignored
3551 
3552    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3553    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3554    storage requirements for this matrix.
3555 
3556    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3557    processor than it must be used on all processors that share the object for
3558    that argument.
3559 
3560    The user MUST specify either the local or global matrix dimensions
3561    (possibly both).
3562 
3563    The parallel matrix is partitioned across processors such that the
3564    first m0 rows belong to process 0, the next m1 rows belong to
3565    process 1, the next m2 rows belong to process 2 etc.. where
3566    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3567    values corresponding to [m x N] submatrix.
3568 
3569    The columns are logically partitioned with the n0 columns belonging
3570    to 0th partition, the next n1 columns belonging to the next
3571    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3572 
3573    The DIAGONAL portion of the local submatrix on any given processor
3574    is the submatrix corresponding to the rows and columns m,n
3575    corresponding to the given processor. i.e diagonal matrix on
3576    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3577    etc. The remaining portion of the local submatrix [m x (N-n)]
3578    constitute the OFF-DIAGONAL portion. The example below better
3579    illustrates this concept.
3580 
3581    For a square global matrix we define each processor's diagonal portion
3582    to be its local rows and the corresponding columns (a square submatrix);
3583    each processor's off-diagonal portion encompasses the remainder of the
3584    local matrix (a rectangular submatrix).
3585 
3586    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3587 
3588    When calling this routine with a single process communicator, a matrix of
3589    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3590    type of communicator, use the construction mechanism:
3591      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3592 
3593    By default, this format uses inodes (identical nodes) when possible.
3594    We search for consecutive rows with the same nonzero structure, thereby
3595    reusing matrix information to achieve increased efficiency.
3596 
3597    Options Database Keys:
3598 +  -mat_no_inode  - Do not use inodes
3599 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3600 -  -mat_aij_oneindex - Internally use indexing starting at 1
3601         rather than 0.  Note that when calling MatSetValues(),
3602         the user still MUST index entries starting at 0!
3603 
3604 
3605    Example usage:
3606 
3607    Consider the following 8x8 matrix with 34 non-zero values, that is
3608    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3609    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3610    as follows:
3611 
3612 .vb
3613             1  2  0  |  0  3  0  |  0  4
3614     Proc0   0  5  6  |  7  0  0  |  8  0
3615             9  0 10  | 11  0  0  | 12  0
3616     -------------------------------------
3617            13  0 14  | 15 16 17  |  0  0
3618     Proc1   0 18  0  | 19 20 21  |  0  0
3619             0  0  0  | 22 23  0  | 24  0
3620     -------------------------------------
3621     Proc2  25 26 27  |  0  0 28  | 29  0
3622            30  0  0  | 31 32 33  |  0 34
3623 .ve
3624 
3625    This can be represented as a collection of submatrices as:
3626 
3627 .vb
3628       A B C
3629       D E F
3630       G H I
3631 .ve
3632 
3633    Where the submatrices A,B,C are owned by proc0, D,E,F are
3634    owned by proc1, G,H,I are owned by proc2.
3635 
3636    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3637    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3638    The 'M','N' parameters are 8,8, and have the same values on all procs.
3639 
3640    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3641    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3642    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3643    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3644    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3645    matrix, ans [DF] as another SeqAIJ matrix.
3646 
3647    When d_nz, o_nz parameters are specified, d_nz storage elements are
3648    allocated for every row of the local diagonal submatrix, and o_nz
3649    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3650    One way to choose d_nz and o_nz is to use the max nonzerors per local
3651    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3652    In this case, the values of d_nz,o_nz are:
3653 .vb
3654      proc0 : dnz = 2, o_nz = 2
3655      proc1 : dnz = 3, o_nz = 2
3656      proc2 : dnz = 1, o_nz = 4
3657 .ve
3658    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3659    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3660    for proc3. i.e we are using 12+15+10=37 storage locations to store
3661    34 values.
3662 
3663    When d_nnz, o_nnz parameters are specified, the storage is specified
3664    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3665    In the above case the values for d_nnz,o_nnz are:
3666 .vb
3667      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3668      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3669      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3670 .ve
3671    Here the space allocated is sum of all the above values i.e 34, and
3672    hence pre-allocation is perfect.
3673 
3674    Level: intermediate
3675 
3676 .keywords: matrix, aij, compressed row, sparse, parallel
3677 
3678 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3679           MPIAIJ, MatCreateMPIAIJWithArrays()
3680 @*/
3681 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3682 {
3683   PetscErrorCode ierr;
3684   PetscMPIInt    size;
3685 
3686   PetscFunctionBegin;
3687   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3688   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3689   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3690   if (size > 1) {
3691     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3692     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3693   } else {
3694     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3695     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3696   }
3697   PetscFunctionReturn(0);
3698 }
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3702 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3703 {
3704   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3705 
3706   PetscFunctionBegin;
3707   *Ad     = a->A;
3708   *Ao     = a->B;
3709   *colmap = a->garray;
3710   PetscFunctionReturn(0);
3711 }
3712 
3713 #undef __FUNCT__
3714 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3715 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3716 {
3717   PetscErrorCode ierr;
3718   PetscInt       i;
3719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3720 
3721   PetscFunctionBegin;
3722   if (coloring->ctype == IS_COLORING_GLOBAL) {
3723     ISColoringValue *allcolors,*colors;
3724     ISColoring      ocoloring;
3725 
3726     /* set coloring for diagonal portion */
3727     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3728 
3729     /* set coloring for off-diagonal portion */
3730     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3731     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3732     for (i=0; i<a->B->cmap->n; i++) {
3733       colors[i] = allcolors[a->garray[i]];
3734     }
3735     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3736     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3737     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3738     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3739   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3740     ISColoringValue *colors;
3741     PetscInt        *larray;
3742     ISColoring      ocoloring;
3743 
3744     /* set coloring for diagonal portion */
3745     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3746     for (i=0; i<a->A->cmap->n; i++) {
3747       larray[i] = i + A->cmap->rstart;
3748     }
3749     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3750     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3751     for (i=0; i<a->A->cmap->n; i++) {
3752       colors[i] = coloring->colors[larray[i]];
3753     }
3754     ierr = PetscFree(larray);CHKERRQ(ierr);
3755     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3756     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3757     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3758 
3759     /* set coloring for off-diagonal portion */
3760     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3761     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3762     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3763     for (i=0; i<a->B->cmap->n; i++) {
3764       colors[i] = coloring->colors[larray[i]];
3765     }
3766     ierr = PetscFree(larray);CHKERRQ(ierr);
3767     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3768     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3769     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3770   } else {
3771     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3772   }
3773 
3774   PetscFunctionReturn(0);
3775 }
3776 
3777 #if defined(PETSC_HAVE_ADIC)
3778 #undef __FUNCT__
3779 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3780 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3781 {
3782   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3783   PetscErrorCode ierr;
3784 
3785   PetscFunctionBegin;
3786   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3787   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3788   PetscFunctionReturn(0);
3789 }
3790 #endif
3791 
3792 #undef __FUNCT__
3793 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3794 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3795 {
3796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3797   PetscErrorCode ierr;
3798 
3799   PetscFunctionBegin;
3800   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3801   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatMerge"
3807 /*@
3808       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3809                  matrices from each processor
3810 
3811     Collective on MPI_Comm
3812 
3813    Input Parameters:
3814 +    comm - the communicators the parallel matrix will live on
3815 .    inmat - the input sequential matrices
3816 .    n - number of local columns (or PETSC_DECIDE)
3817 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3818 
3819    Output Parameter:
3820 .    outmat - the parallel matrix generated
3821 
3822     Level: advanced
3823 
3824    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3825 
3826 @*/
3827 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3828 {
3829   PetscErrorCode ierr;
3830   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3831   PetscInt       *indx;
3832   PetscScalar    *values;
3833 
3834   PetscFunctionBegin;
3835   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3836   if (scall == MAT_INITIAL_MATRIX){
3837     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3838     if (n == PETSC_DECIDE){
3839       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3840     }
3841     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3842     rstart -= m;
3843 
3844     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3845     for (i=0;i<m;i++) {
3846       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3847       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3848       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3849     }
3850     /* This routine will ONLY return MPIAIJ type matrix */
3851     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3852     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3853     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3854     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3855     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3856 
3857   } else if (scall == MAT_REUSE_MATRIX){
3858     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3859   } else {
3860     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3861   }
3862 
3863   for (i=0;i<m;i++) {
3864     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3865     Ii    = i + rstart;
3866     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3867     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3868   }
3869   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3870   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3871   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3872 
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 #undef __FUNCT__
3877 #define __FUNCT__ "MatFileSplit"
3878 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3879 {
3880   PetscErrorCode    ierr;
3881   PetscMPIInt       rank;
3882   PetscInt          m,N,i,rstart,nnz;
3883   size_t            len;
3884   const PetscInt    *indx;
3885   PetscViewer       out;
3886   char              *name;
3887   Mat               B;
3888   const PetscScalar *values;
3889 
3890   PetscFunctionBegin;
3891   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3892   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3893   /* Should this be the type of the diagonal block of A? */
3894   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3895   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3896   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3897   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3898   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3899   for (i=0;i<m;i++) {
3900     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3901     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3902     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3903   }
3904   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3905   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3906 
3907   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3908   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3909   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3910   sprintf(name,"%s.%d",outfile,rank);
3911   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3912   ierr = PetscFree(name);
3913   ierr = MatView(B,out);CHKERRQ(ierr);
3914   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3915   ierr = MatDestroy(B);CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 
3919 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3920 #undef __FUNCT__
3921 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3922 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3923 {
3924   PetscErrorCode       ierr;
3925   Mat_Merge_SeqsToMPI  *merge;
3926   PetscContainer       container;
3927 
3928   PetscFunctionBegin;
3929   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3930   if (container) {
3931     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3932     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3933     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3934     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3935     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3936     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3937     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3938     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3939     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3940     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3941     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3942     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3943 
3944     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3945     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3946   }
3947   ierr = PetscFree(merge);CHKERRQ(ierr);
3948 
3949   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3950   PetscFunctionReturn(0);
3951 }
3952 
3953 #include "../src/mat/utils/freespace.h"
3954 #include "petscbt.h"
3955 
3956 #undef __FUNCT__
3957 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3958 /*@C
3959       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3960                  matrices from each processor
3961 
3962     Collective on MPI_Comm
3963 
3964    Input Parameters:
3965 +    comm - the communicators the parallel matrix will live on
3966 .    seqmat - the input sequential matrices
3967 .    m - number of local rows (or PETSC_DECIDE)
3968 .    n - number of local columns (or PETSC_DECIDE)
3969 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3970 
3971    Output Parameter:
3972 .    mpimat - the parallel matrix generated
3973 
3974     Level: advanced
3975 
3976    Notes:
3977      The dimensions of the sequential matrix in each processor MUST be the same.
3978      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3979      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3980 @*/
3981 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3982 {
3983   PetscErrorCode       ierr;
3984   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3985   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3986   PetscMPIInt          size,rank,taga,*len_s;
3987   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3988   PetscInt             proc,m;
3989   PetscInt             **buf_ri,**buf_rj;
3990   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3991   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3992   MPI_Request          *s_waits,*r_waits;
3993   MPI_Status           *status;
3994   MatScalar            *aa=a->a;
3995   MatScalar            **abuf_r,*ba_i;
3996   Mat_Merge_SeqsToMPI  *merge;
3997   PetscContainer       container;
3998 
3999   PetscFunctionBegin;
4000   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4001 
4002   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4003   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4004 
4005   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4006   if (container) {
4007     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4008   }
4009   bi     = merge->bi;
4010   bj     = merge->bj;
4011   buf_ri = merge->buf_ri;
4012   buf_rj = merge->buf_rj;
4013 
4014   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4015   owners = merge->rowmap.range;
4016   len_s  = merge->len_s;
4017 
4018   /* send and recv matrix values */
4019   /*-----------------------------*/
4020   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4021   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4022 
4023   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4024   for (proc=0,k=0; proc<size; proc++){
4025     if (!len_s[proc]) continue;
4026     i = owners[proc];
4027     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4028     k++;
4029   }
4030 
4031   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4032   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4033   ierr = PetscFree(status);CHKERRQ(ierr);
4034 
4035   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4036   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4037 
4038   /* insert mat values of mpimat */
4039   /*----------------------------*/
4040   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4041   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4042   nextrow = buf_ri_k + merge->nrecv;
4043   nextai  = nextrow + merge->nrecv;
4044 
4045   for (k=0; k<merge->nrecv; k++){
4046     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4047     nrows = *(buf_ri_k[k]);
4048     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4049     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4050   }
4051 
4052   /* set values of ba */
4053   m = merge->rowmap.n;
4054   for (i=0; i<m; i++) {
4055     arow = owners[rank] + i;
4056     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4057     bnzi = bi[i+1] - bi[i];
4058     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4059 
4060     /* add local non-zero vals of this proc's seqmat into ba */
4061     anzi = ai[arow+1] - ai[arow];
4062     aj   = a->j + ai[arow];
4063     aa   = a->a + ai[arow];
4064     nextaj = 0;
4065     for (j=0; nextaj<anzi; j++){
4066       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4067         ba_i[j] += aa[nextaj++];
4068       }
4069     }
4070 
4071     /* add received vals into ba */
4072     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4073       /* i-th row */
4074       if (i == *nextrow[k]) {
4075         anzi = *(nextai[k]+1) - *nextai[k];
4076         aj   = buf_rj[k] + *(nextai[k]);
4077         aa   = abuf_r[k] + *(nextai[k]);
4078         nextaj = 0;
4079         for (j=0; nextaj<anzi; j++){
4080           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4081             ba_i[j] += aa[nextaj++];
4082           }
4083         }
4084         nextrow[k]++; nextai[k]++;
4085       }
4086     }
4087     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4088   }
4089   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4090   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4091 
4092   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4093   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4094   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4095   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4096   PetscFunctionReturn(0);
4097 }
4098 
4099 #undef __FUNCT__
4100 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4101 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4102 {
4103   PetscErrorCode       ierr;
4104   Mat                  B_mpi;
4105   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4106   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4107   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4108   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4109   PetscInt             len,proc,*dnz,*onz;
4110   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4111   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4112   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4113   MPI_Status           *status;
4114   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4115   PetscBT              lnkbt;
4116   Mat_Merge_SeqsToMPI  *merge;
4117   PetscContainer       container;
4118 
4119   PetscFunctionBegin;
4120   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4121 
4122   /* make sure it is a PETSc comm */
4123   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4124   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4125   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4126 
4127   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4128   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4129 
4130   /* determine row ownership */
4131   /*---------------------------------------------------------*/
4132   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4133   merge->rowmap.n = m;
4134   merge->rowmap.N = M;
4135   merge->rowmap.bs = 1;
4136   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4137   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4138   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4139 
4140   m      = merge->rowmap.n;
4141   M      = merge->rowmap.N;
4142   owners = merge->rowmap.range;
4143 
4144   /* determine the number of messages to send, their lengths */
4145   /*---------------------------------------------------------*/
4146   len_s  = merge->len_s;
4147 
4148   len = 0;  /* length of buf_si[] */
4149   merge->nsend = 0;
4150   for (proc=0; proc<size; proc++){
4151     len_si[proc] = 0;
4152     if (proc == rank){
4153       len_s[proc] = 0;
4154     } else {
4155       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4156       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4157     }
4158     if (len_s[proc]) {
4159       merge->nsend++;
4160       nrows = 0;
4161       for (i=owners[proc]; i<owners[proc+1]; i++){
4162         if (ai[i+1] > ai[i]) nrows++;
4163       }
4164       len_si[proc] = 2*(nrows+1);
4165       len += len_si[proc];
4166     }
4167   }
4168 
4169   /* determine the number and length of messages to receive for ij-structure */
4170   /*-------------------------------------------------------------------------*/
4171   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4172   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4173 
4174   /* post the Irecv of j-structure */
4175   /*-------------------------------*/
4176   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4177   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4178 
4179   /* post the Isend of j-structure */
4180   /*--------------------------------*/
4181   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4182   sj_waits = si_waits + merge->nsend;
4183 
4184   for (proc=0, k=0; proc<size; proc++){
4185     if (!len_s[proc]) continue;
4186     i = owners[proc];
4187     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4188     k++;
4189   }
4190 
4191   /* receives and sends of j-structure are complete */
4192   /*------------------------------------------------*/
4193   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4194   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4195 
4196   /* send and recv i-structure */
4197   /*---------------------------*/
4198   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4199   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4200 
4201   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4202   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4203   for (proc=0,k=0; proc<size; proc++){
4204     if (!len_s[proc]) continue;
4205     /* form outgoing message for i-structure:
4206          buf_si[0]:                 nrows to be sent
4207                [1:nrows]:           row index (global)
4208                [nrows+1:2*nrows+1]: i-structure index
4209     */
4210     /*-------------------------------------------*/
4211     nrows = len_si[proc]/2 - 1;
4212     buf_si_i    = buf_si + nrows+1;
4213     buf_si[0]   = nrows;
4214     buf_si_i[0] = 0;
4215     nrows = 0;
4216     for (i=owners[proc]; i<owners[proc+1]; i++){
4217       anzi = ai[i+1] - ai[i];
4218       if (anzi) {
4219         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4220         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4221         nrows++;
4222       }
4223     }
4224     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4225     k++;
4226     buf_si += len_si[proc];
4227   }
4228 
4229   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4230   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4231 
4232   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4233   for (i=0; i<merge->nrecv; i++){
4234     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4235   }
4236 
4237   ierr = PetscFree(len_si);CHKERRQ(ierr);
4238   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4239   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4240   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4241   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4242   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4243   ierr = PetscFree(status);CHKERRQ(ierr);
4244 
4245   /* compute a local seq matrix in each processor */
4246   /*----------------------------------------------*/
4247   /* allocate bi array and free space for accumulating nonzero column info */
4248   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4249   bi[0] = 0;
4250 
4251   /* create and initialize a linked list */
4252   nlnk = N+1;
4253   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4254 
4255   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4256   len = 0;
4257   len  = ai[owners[rank+1]] - ai[owners[rank]];
4258   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4259   current_space = free_space;
4260 
4261   /* determine symbolic info for each local row */
4262   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4263   nextrow = buf_ri_k + merge->nrecv;
4264   nextai  = nextrow + merge->nrecv;
4265   for (k=0; k<merge->nrecv; k++){
4266     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4267     nrows = *buf_ri_k[k];
4268     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4269     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4270   }
4271 
4272   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4273   len = 0;
4274   for (i=0;i<m;i++) {
4275     bnzi   = 0;
4276     /* add local non-zero cols of this proc's seqmat into lnk */
4277     arow   = owners[rank] + i;
4278     anzi   = ai[arow+1] - ai[arow];
4279     aj     = a->j + ai[arow];
4280     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4281     bnzi += nlnk;
4282     /* add received col data into lnk */
4283     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4284       if (i == *nextrow[k]) { /* i-th row */
4285         anzi = *(nextai[k]+1) - *nextai[k];
4286         aj   = buf_rj[k] + *nextai[k];
4287         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4288         bnzi += nlnk;
4289         nextrow[k]++; nextai[k]++;
4290       }
4291     }
4292     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4293 
4294     /* if free space is not available, make more free space */
4295     if (current_space->local_remaining<bnzi) {
4296       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4297       nspacedouble++;
4298     }
4299     /* copy data into free space, then initialize lnk */
4300     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4301     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4302 
4303     current_space->array           += bnzi;
4304     current_space->local_used      += bnzi;
4305     current_space->local_remaining -= bnzi;
4306 
4307     bi[i+1] = bi[i] + bnzi;
4308   }
4309 
4310   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4311 
4312   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4313   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4314   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4315 
4316   /* create symbolic parallel matrix B_mpi */
4317   /*---------------------------------------*/
4318   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4319   if (n==PETSC_DECIDE) {
4320     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4321   } else {
4322     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4323   }
4324   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4325   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4326   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4327 
4328   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4329   B_mpi->assembled     = PETSC_FALSE;
4330   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4331   merge->bi            = bi;
4332   merge->bj            = bj;
4333   merge->buf_ri        = buf_ri;
4334   merge->buf_rj        = buf_rj;
4335   merge->coi           = PETSC_NULL;
4336   merge->coj           = PETSC_NULL;
4337   merge->owners_co     = PETSC_NULL;
4338 
4339   /* attach the supporting struct to B_mpi for reuse */
4340   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4341   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4342   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4343   *mpimat = B_mpi;
4344 
4345   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4346   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4347   PetscFunctionReturn(0);
4348 }
4349 
4350 #undef __FUNCT__
4351 #define __FUNCT__ "MatMerge_SeqsToMPI"
4352 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4353 {
4354   PetscErrorCode   ierr;
4355 
4356   PetscFunctionBegin;
4357   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4358   if (scall == MAT_INITIAL_MATRIX){
4359     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4360   }
4361   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4362   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 #undef __FUNCT__
4367 #define __FUNCT__ "MatGetLocalMat"
4368 /*@
4369      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4370 
4371     Not Collective
4372 
4373    Input Parameters:
4374 +    A - the matrix
4375 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4376 
4377    Output Parameter:
4378 .    A_loc - the local sequential matrix generated
4379 
4380     Level: developer
4381 
4382 @*/
4383 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4384 {
4385   PetscErrorCode  ierr;
4386   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4387   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4388   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4389   MatScalar       *aa=a->a,*ba=b->a,*cam;
4390   PetscScalar     *ca;
4391   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4392   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4393 
4394   PetscFunctionBegin;
4395   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4396   if (scall == MAT_INITIAL_MATRIX){
4397     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4398     ci[0] = 0;
4399     for (i=0; i<am; i++){
4400       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4401     }
4402     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4403     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4404     k = 0;
4405     for (i=0; i<am; i++) {
4406       ncols_o = bi[i+1] - bi[i];
4407       ncols_d = ai[i+1] - ai[i];
4408       /* off-diagonal portion of A */
4409       for (jo=0; jo<ncols_o; jo++) {
4410         col = cmap[*bj];
4411         if (col >= cstart) break;
4412         cj[k]   = col; bj++;
4413         ca[k++] = *ba++;
4414       }
4415       /* diagonal portion of A */
4416       for (j=0; j<ncols_d; j++) {
4417         cj[k]   = cstart + *aj++;
4418         ca[k++] = *aa++;
4419       }
4420       /* off-diagonal portion of A */
4421       for (j=jo; j<ncols_o; j++) {
4422         cj[k]   = cmap[*bj++];
4423         ca[k++] = *ba++;
4424       }
4425     }
4426     /* put together the new matrix */
4427     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4428     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4429     /* Since these are PETSc arrays, change flags to free them as necessary. */
4430     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4431     mat->free_a  = PETSC_TRUE;
4432     mat->free_ij = PETSC_TRUE;
4433     mat->nonew   = 0;
4434   } else if (scall == MAT_REUSE_MATRIX){
4435     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4436     ci = mat->i; cj = mat->j; cam = mat->a;
4437     for (i=0; i<am; i++) {
4438       /* off-diagonal portion of A */
4439       ncols_o = bi[i+1] - bi[i];
4440       for (jo=0; jo<ncols_o; jo++) {
4441         col = cmap[*bj];
4442         if (col >= cstart) break;
4443         *cam++ = *ba++; bj++;
4444       }
4445       /* diagonal portion of A */
4446       ncols_d = ai[i+1] - ai[i];
4447       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4448       /* off-diagonal portion of A */
4449       for (j=jo; j<ncols_o; j++) {
4450         *cam++ = *ba++; bj++;
4451       }
4452     }
4453   } else {
4454     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4455   }
4456 
4457   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4458   PetscFunctionReturn(0);
4459 }
4460 
4461 #undef __FUNCT__
4462 #define __FUNCT__ "MatGetLocalMatCondensed"
4463 /*@C
4464      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4465 
4466     Not Collective
4467 
4468    Input Parameters:
4469 +    A - the matrix
4470 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4471 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4472 
4473    Output Parameter:
4474 .    A_loc - the local sequential matrix generated
4475 
4476     Level: developer
4477 
4478 @*/
4479 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4480 {
4481   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4482   PetscErrorCode    ierr;
4483   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4484   IS                isrowa,iscola;
4485   Mat               *aloc;
4486 
4487   PetscFunctionBegin;
4488   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4489   if (!row){
4490     start = A->rmap->rstart; end = A->rmap->rend;
4491     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4492   } else {
4493     isrowa = *row;
4494   }
4495   if (!col){
4496     start = A->cmap->rstart;
4497     cmap  = a->garray;
4498     nzA   = a->A->cmap->n;
4499     nzB   = a->B->cmap->n;
4500     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4501     ncols = 0;
4502     for (i=0; i<nzB; i++) {
4503       if (cmap[i] < start) idx[ncols++] = cmap[i];
4504       else break;
4505     }
4506     imark = i;
4507     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4508     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4509     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4510     ierr = PetscFree(idx);CHKERRQ(ierr);
4511   } else {
4512     iscola = *col;
4513   }
4514   if (scall != MAT_INITIAL_MATRIX){
4515     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4516     aloc[0] = *A_loc;
4517   }
4518   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4519   *A_loc = aloc[0];
4520   ierr = PetscFree(aloc);CHKERRQ(ierr);
4521   if (!row){
4522     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4523   }
4524   if (!col){
4525     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4526   }
4527   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 #undef __FUNCT__
4532 #define __FUNCT__ "MatGetBrowsOfAcols"
4533 /*@C
4534     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4535 
4536     Collective on Mat
4537 
4538    Input Parameters:
4539 +    A,B - the matrices in mpiaij format
4540 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4541 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4542 
4543    Output Parameter:
4544 +    rowb, colb - index sets of rows and columns of B to extract
4545 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4546 -    B_seq - the sequential matrix generated
4547 
4548     Level: developer
4549 
4550 @*/
4551 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4552 {
4553   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4554   PetscErrorCode    ierr;
4555   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4556   IS                isrowb,iscolb;
4557   Mat               *bseq;
4558 
4559   PetscFunctionBegin;
4560   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4561     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4562   }
4563   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4564 
4565   if (scall == MAT_INITIAL_MATRIX){
4566     start = A->cmap->rstart;
4567     cmap  = a->garray;
4568     nzA   = a->A->cmap->n;
4569     nzB   = a->B->cmap->n;
4570     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4571     ncols = 0;
4572     for (i=0; i<nzB; i++) {  /* row < local row index */
4573       if (cmap[i] < start) idx[ncols++] = cmap[i];
4574       else break;
4575     }
4576     imark = i;
4577     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4578     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4579     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4580     ierr = PetscFree(idx);CHKERRQ(ierr);
4581     *brstart = imark;
4582     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4583   } else {
4584     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4585     isrowb = *rowb; iscolb = *colb;
4586     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4587     bseq[0] = *B_seq;
4588   }
4589   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4590   *B_seq = bseq[0];
4591   ierr = PetscFree(bseq);CHKERRQ(ierr);
4592   if (!rowb){
4593     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4594   } else {
4595     *rowb = isrowb;
4596   }
4597   if (!colb){
4598     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4599   } else {
4600     *colb = iscolb;
4601   }
4602   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4603   PetscFunctionReturn(0);
4604 }
4605 
4606 #undef __FUNCT__
4607 #define __FUNCT__ "MatGetBrowsOfAoCols"
4608 /*@C
4609     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4610     of the OFF-DIAGONAL portion of local A
4611 
4612     Collective on Mat
4613 
4614    Input Parameters:
4615 +    A,B - the matrices in mpiaij format
4616 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4617 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4618 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4619 
4620    Output Parameter:
4621 +    B_oth - the sequential matrix generated
4622 
4623     Level: developer
4624 
4625 @*/
4626 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4627 {
4628   VecScatter_MPI_General *gen_to,*gen_from;
4629   PetscErrorCode         ierr;
4630   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4631   Mat_SeqAIJ             *b_oth;
4632   VecScatter             ctx=a->Mvctx;
4633   MPI_Comm               comm=((PetscObject)ctx)->comm;
4634   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4635   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4636   PetscScalar            *rvalues,*svalues;
4637   MatScalar              *b_otha,*bufa,*bufA;
4638   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4639   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4640   MPI_Status             *sstatus,rstatus;
4641   PetscMPIInt            jj;
4642   PetscInt               *cols,sbs,rbs;
4643   PetscScalar            *vals;
4644 
4645   PetscFunctionBegin;
4646   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4647     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4648   }
4649   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4650   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4651 
4652   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4653   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4654   rvalues  = gen_from->values; /* holds the length of receiving row */
4655   svalues  = gen_to->values;   /* holds the length of sending row */
4656   nrecvs   = gen_from->n;
4657   nsends   = gen_to->n;
4658 
4659   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4660   srow     = gen_to->indices;   /* local row index to be sent */
4661   sstarts  = gen_to->starts;
4662   sprocs   = gen_to->procs;
4663   sstatus  = gen_to->sstatus;
4664   sbs      = gen_to->bs;
4665   rstarts  = gen_from->starts;
4666   rprocs   = gen_from->procs;
4667   rbs      = gen_from->bs;
4668 
4669   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4670   if (scall == MAT_INITIAL_MATRIX){
4671     /* i-array */
4672     /*---------*/
4673     /*  post receives */
4674     for (i=0; i<nrecvs; i++){
4675       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4676       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4677       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4678     }
4679 
4680     /* pack the outgoing message */
4681     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4682     rstartsj = sstartsj + nsends +1;
4683     sstartsj[0] = 0;  rstartsj[0] = 0;
4684     len = 0; /* total length of j or a array to be sent */
4685     k = 0;
4686     for (i=0; i<nsends; i++){
4687       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4688       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4689       for (j=0; j<nrows; j++) {
4690         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4691         for (l=0; l<sbs; l++){
4692           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4693           rowlen[j*sbs+l] = ncols;
4694           len += ncols;
4695           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4696         }
4697         k++;
4698       }
4699       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4700       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4701     }
4702     /* recvs and sends of i-array are completed */
4703     i = nrecvs;
4704     while (i--) {
4705       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4706     }
4707     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4708 
4709     /* allocate buffers for sending j and a arrays */
4710     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4711     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4712 
4713     /* create i-array of B_oth */
4714     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4715     b_othi[0] = 0;
4716     len = 0; /* total length of j or a array to be received */
4717     k = 0;
4718     for (i=0; i<nrecvs; i++){
4719       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4720       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4721       for (j=0; j<nrows; j++) {
4722         b_othi[k+1] = b_othi[k] + rowlen[j];
4723         len += rowlen[j]; k++;
4724       }
4725       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4726     }
4727 
4728     /* allocate space for j and a arrrays of B_oth */
4729     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4730     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4731 
4732     /* j-array */
4733     /*---------*/
4734     /*  post receives of j-array */
4735     for (i=0; i<nrecvs; i++){
4736       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4737       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4738     }
4739 
4740     /* pack the outgoing message j-array */
4741     k = 0;
4742     for (i=0; i<nsends; i++){
4743       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4744       bufJ = bufj+sstartsj[i];
4745       for (j=0; j<nrows; j++) {
4746         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4747         for (ll=0; ll<sbs; ll++){
4748           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4749           for (l=0; l<ncols; l++){
4750             *bufJ++ = cols[l];
4751           }
4752           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4753         }
4754       }
4755       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4756     }
4757 
4758     /* recvs and sends of j-array are completed */
4759     i = nrecvs;
4760     while (i--) {
4761       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4762     }
4763     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4764   } else if (scall == MAT_REUSE_MATRIX){
4765     sstartsj = *startsj;
4766     rstartsj = sstartsj + nsends +1;
4767     bufa     = *bufa_ptr;
4768     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4769     b_otha   = b_oth->a;
4770   } else {
4771     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4772   }
4773 
4774   /* a-array */
4775   /*---------*/
4776   /*  post receives of a-array */
4777   for (i=0; i<nrecvs; i++){
4778     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4779     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4780   }
4781 
4782   /* pack the outgoing message a-array */
4783   k = 0;
4784   for (i=0; i<nsends; i++){
4785     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4786     bufA = bufa+sstartsj[i];
4787     for (j=0; j<nrows; j++) {
4788       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4789       for (ll=0; ll<sbs; ll++){
4790         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4791         for (l=0; l<ncols; l++){
4792           *bufA++ = vals[l];
4793         }
4794         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4795       }
4796     }
4797     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4798   }
4799   /* recvs and sends of a-array are completed */
4800   i = nrecvs;
4801   while (i--) {
4802     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4803   }
4804   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4805   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4806 
4807   if (scall == MAT_INITIAL_MATRIX){
4808     /* put together the new matrix */
4809     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4810 
4811     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4812     /* Since these are PETSc arrays, change flags to free them as necessary. */
4813     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4814     b_oth->free_a  = PETSC_TRUE;
4815     b_oth->free_ij = PETSC_TRUE;
4816     b_oth->nonew   = 0;
4817 
4818     ierr = PetscFree(bufj);CHKERRQ(ierr);
4819     if (!startsj || !bufa_ptr){
4820       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4821       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4822     } else {
4823       *startsj  = sstartsj;
4824       *bufa_ptr = bufa;
4825     }
4826   }
4827   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4828   PetscFunctionReturn(0);
4829 }
4830 
4831 #undef __FUNCT__
4832 #define __FUNCT__ "MatGetCommunicationStructs"
4833 /*@C
4834   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4835 
4836   Not Collective
4837 
4838   Input Parameters:
4839 . A - The matrix in mpiaij format
4840 
4841   Output Parameter:
4842 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4843 . colmap - A map from global column index to local index into lvec
4844 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4845 
4846   Level: developer
4847 
4848 @*/
4849 #if defined (PETSC_USE_CTABLE)
4850 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4851 #else
4852 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4853 #endif
4854 {
4855   Mat_MPIAIJ *a;
4856 
4857   PetscFunctionBegin;
4858   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4859   PetscValidPointer(lvec, 2)
4860   PetscValidPointer(colmap, 3)
4861   PetscValidPointer(multScatter, 4)
4862   a = (Mat_MPIAIJ *) A->data;
4863   if (lvec) *lvec = a->lvec;
4864   if (colmap) *colmap = a->colmap;
4865   if (multScatter) *multScatter = a->Mvctx;
4866   PetscFunctionReturn(0);
4867 }
4868 
4869 EXTERN_C_BEGIN
4870 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4871 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4872 EXTERN_C_END
4873 
4874 #include "../src/mat/impls/dense/mpi/mpidense.h"
4875 
4876 #undef __FUNCT__
4877 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4878 /*
4879     Computes (B'*A')' since computing B*A directly is untenable
4880 
4881                n                       p                          p
4882         (              )       (              )         (                  )
4883       m (      A       )  *  n (       B      )   =   m (         C        )
4884         (              )       (              )         (                  )
4885 
4886 */
4887 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4888 {
4889   PetscErrorCode     ierr;
4890   Mat                At,Bt,Ct;
4891 
4892   PetscFunctionBegin;
4893   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4894   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4895   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4896   ierr = MatDestroy(At);CHKERRQ(ierr);
4897   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4898   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4899   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4905 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4906 {
4907   PetscErrorCode ierr;
4908   PetscInt       m=A->rmap->n,n=B->cmap->n;
4909   Mat            Cmat;
4910 
4911   PetscFunctionBegin;
4912   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4913   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4914   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4915   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4916   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4917   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4918   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4919   *C   = Cmat;
4920   PetscFunctionReturn(0);
4921 }
4922 
4923 /* ----------------------------------------------------------------*/
4924 #undef __FUNCT__
4925 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4926 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4927 {
4928   PetscErrorCode ierr;
4929 
4930   PetscFunctionBegin;
4931   if (scall == MAT_INITIAL_MATRIX){
4932     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4933   }
4934   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4935   PetscFunctionReturn(0);
4936 }
4937 
4938 EXTERN_C_BEGIN
4939 #if defined(PETSC_HAVE_MUMPS)
4940 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4941 #endif
4942 #if defined(PETSC_HAVE_PASTIX)
4943 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4944 #endif
4945 #if defined(PETSC_HAVE_SUPERLU_DIST)
4946 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4947 #endif
4948 #if defined(PETSC_HAVE_SPOOLES)
4949 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4950 #endif
4951 EXTERN_C_END
4952 
4953 /*MC
4954    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4955 
4956    Options Database Keys:
4957 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4958 
4959   Level: beginner
4960 
4961 .seealso: MatCreateMPIAIJ()
4962 M*/
4963 
4964 EXTERN_C_BEGIN
4965 #undef __FUNCT__
4966 #define __FUNCT__ "MatCreate_MPIAIJ"
4967 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4968 {
4969   Mat_MPIAIJ     *b;
4970   PetscErrorCode ierr;
4971   PetscMPIInt    size;
4972 
4973   PetscFunctionBegin;
4974   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4975 
4976   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4977   B->data         = (void*)b;
4978   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4979   B->rmap->bs      = 1;
4980   B->assembled    = PETSC_FALSE;
4981   B->mapping      = 0;
4982 
4983   B->insertmode      = NOT_SET_VALUES;
4984   b->size            = size;
4985   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4986 
4987   /* build cache for off array entries formed */
4988   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4989   b->donotstash  = PETSC_FALSE;
4990   b->colmap      = 0;
4991   b->garray      = 0;
4992   b->roworiented = PETSC_TRUE;
4993 
4994   /* stuff used for matrix vector multiply */
4995   b->lvec      = PETSC_NULL;
4996   b->Mvctx     = PETSC_NULL;
4997 
4998   /* stuff for MatGetRow() */
4999   b->rowindices   = 0;
5000   b->rowvalues    = 0;
5001   b->getrowactive = PETSC_FALSE;
5002 
5003 #if defined(PETSC_HAVE_SPOOLES)
5004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
5005                                      "MatGetFactor_mpiaij_spooles",
5006                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5007 #endif
5008 #if defined(PETSC_HAVE_MUMPS)
5009   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
5010                                      "MatGetFactor_mpiaij_mumps",
5011                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5012 #endif
5013 #if defined(PETSC_HAVE_PASTIX)
5014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
5015 					   "MatGetFactor_mpiaij_pastix",
5016 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5017 #endif
5018 #if defined(PETSC_HAVE_SUPERLU_DIST)
5019   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5020                                      "MatGetFactor_mpiaij_superlu_dist",
5021                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5022 #endif
5023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5024                                      "MatStoreValues_MPIAIJ",
5025                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5026   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5027                                      "MatRetrieveValues_MPIAIJ",
5028                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5029   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5030 				     "MatGetDiagonalBlock_MPIAIJ",
5031                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5032   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5033 				     "MatIsTranspose_MPIAIJ",
5034 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5035   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5036 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5037 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5038   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5039 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5040 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5041   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5042 				     "MatDiagonalScaleLocal_MPIAIJ",
5043 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5044   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5045                                      "MatConvert_MPIAIJ_MPICSRPERM",
5046                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5047   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5048                                      "MatConvert_MPIAIJ_MPICRL",
5049                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5050   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5051                                      "MatMatMult_MPIDense_MPIAIJ",
5052                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5053   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5054                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5055                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5056   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5057                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5058                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5059   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5060   PetscFunctionReturn(0);
5061 }
5062 EXTERN_C_END
5063 
5064 #undef __FUNCT__
5065 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5066 /*@
5067      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5068          and "off-diagonal" part of the matrix in CSR format.
5069 
5070    Collective on MPI_Comm
5071 
5072    Input Parameters:
5073 +  comm - MPI communicator
5074 .  m - number of local rows (Cannot be PETSC_DECIDE)
5075 .  n - This value should be the same as the local size used in creating the
5076        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5077        calculated if N is given) For square matrices n is almost always m.
5078 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5079 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5080 .   i - row indices for "diagonal" portion of matrix
5081 .   j - column indices
5082 .   a - matrix values
5083 .   oi - row indices for "off-diagonal" portion of matrix
5084 .   oj - column indices
5085 -   oa - matrix values
5086 
5087    Output Parameter:
5088 .   mat - the matrix
5089 
5090    Level: advanced
5091 
5092    Notes:
5093        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5094 
5095        The i and j indices are 0 based
5096 
5097        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5098 
5099 
5100 .keywords: matrix, aij, compressed row, sparse, parallel
5101 
5102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5103           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5104 @*/
5105 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5106 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5107 {
5108   PetscErrorCode ierr;
5109   Mat_MPIAIJ     *maij;
5110 
5111  PetscFunctionBegin;
5112   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5113   if (i[0]) {
5114     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5115   }
5116   if (oi[0]) {
5117     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5118   }
5119   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5120   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5121   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5122   maij = (Mat_MPIAIJ*) (*mat)->data;
5123   maij->donotstash     = PETSC_TRUE;
5124   (*mat)->preallocated = PETSC_TRUE;
5125 
5126   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5127   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5128   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5129   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5130 
5131   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5132   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5133 
5134   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5135   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5136   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5137   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5138 
5139   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5140   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5141   PetscFunctionReturn(0);
5142 }
5143 
5144 /*
5145     Special version for direct calls from Fortran
5146 */
5147 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5148 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5149 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5150 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5151 #endif
5152 
5153 /* Change these macros so can be used in void function */
5154 #undef CHKERRQ
5155 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5156 #undef SETERRQ2
5157 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5158 #undef SETERRQ
5159 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5160 
5161 EXTERN_C_BEGIN
5162 #undef __FUNCT__
5163 #define __FUNCT__ "matsetvaluesmpiaij_"
5164 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5165 {
5166   Mat             mat = *mmat;
5167   PetscInt        m = *mm, n = *mn;
5168   InsertMode      addv = *maddv;
5169   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5170   PetscScalar     value;
5171   PetscErrorCode  ierr;
5172 
5173   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5174   if (mat->insertmode == NOT_SET_VALUES) {
5175     mat->insertmode = addv;
5176   }
5177 #if defined(PETSC_USE_DEBUG)
5178   else if (mat->insertmode != addv) {
5179     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5180   }
5181 #endif
5182   {
5183   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5184   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5185   PetscTruth      roworiented = aij->roworiented;
5186 
5187   /* Some Variables required in the macro */
5188   Mat             A = aij->A;
5189   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5190   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5191   MatScalar       *aa = a->a;
5192   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5193   Mat             B = aij->B;
5194   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5195   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5196   MatScalar       *ba = b->a;
5197 
5198   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5199   PetscInt        nonew = a->nonew;
5200   MatScalar       *ap1,*ap2;
5201 
5202   PetscFunctionBegin;
5203   for (i=0; i<m; i++) {
5204     if (im[i] < 0) continue;
5205 #if defined(PETSC_USE_DEBUG)
5206     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5207 #endif
5208     if (im[i] >= rstart && im[i] < rend) {
5209       row      = im[i] - rstart;
5210       lastcol1 = -1;
5211       rp1      = aj + ai[row];
5212       ap1      = aa + ai[row];
5213       rmax1    = aimax[row];
5214       nrow1    = ailen[row];
5215       low1     = 0;
5216       high1    = nrow1;
5217       lastcol2 = -1;
5218       rp2      = bj + bi[row];
5219       ap2      = ba + bi[row];
5220       rmax2    = bimax[row];
5221       nrow2    = bilen[row];
5222       low2     = 0;
5223       high2    = nrow2;
5224 
5225       for (j=0; j<n; j++) {
5226         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5227         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5228         if (in[j] >= cstart && in[j] < cend){
5229           col = in[j] - cstart;
5230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5231         } else if (in[j] < 0) continue;
5232 #if defined(PETSC_USE_DEBUG)
5233         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5234 #endif
5235         else {
5236           if (mat->was_assembled) {
5237             if (!aij->colmap) {
5238               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5239             }
5240 #if defined (PETSC_USE_CTABLE)
5241             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5242 	    col--;
5243 #else
5244             col = aij->colmap[in[j]] - 1;
5245 #endif
5246             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5247               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5248               col =  in[j];
5249               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5250               B = aij->B;
5251               b = (Mat_SeqAIJ*)B->data;
5252               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5253               rp2      = bj + bi[row];
5254               ap2      = ba + bi[row];
5255               rmax2    = bimax[row];
5256               nrow2    = bilen[row];
5257               low2     = 0;
5258               high2    = nrow2;
5259               bm       = aij->B->rmap->n;
5260               ba = b->a;
5261             }
5262           } else col = in[j];
5263           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5264         }
5265       }
5266     } else {
5267       if (!aij->donotstash) {
5268         if (roworiented) {
5269           if (ignorezeroentries && v[i*n] == 0.0) continue;
5270           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5271         } else {
5272           if (ignorezeroentries && v[i] == 0.0) continue;
5273           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5274         }
5275       }
5276     }
5277   }}
5278   PetscFunctionReturnVoid();
5279 }
5280 EXTERN_C_END
5281 
5282