xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 81540f2fa6ed413bc384182845a7ebe823db191f)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap->rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap->rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows;
1261   const PetscInt *rows;
1262   int            ntids;
1263   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1268   /* make a collective version of 'rowp' */
1269   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1270   if (pcomm==comm) {
1271     crowp = rowp;
1272   } else {
1273     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1274     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1275     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1276     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1277   }
1278   /* collect the global row permutation and invert it */
1279   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1280   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1281   if (pcomm!=comm) {
1282     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1283   }
1284   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1285   /* get the local target indices */
1286   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1288   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1289   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1290   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1291   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1292   /* the column permutation is so much easier;
1293      make a local version of 'colp' and invert it */
1294   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1295   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1296   if (ntids==1) {
1297     lcolp = colp;
1298   } else {
1299     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1300     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1301     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1302   }
1303   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1304   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1305   if (ntids>1) {
1306     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1307     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1308   }
1309   /* now we just get the submatrix */
1310   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1311   /* clean up */
1312   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1313   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1319 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1320 {
1321   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1322   Mat            A = mat->A,B = mat->B;
1323   PetscErrorCode ierr;
1324   PetscReal      isend[5],irecv[5];
1325 
1326   PetscFunctionBegin;
1327   info->block_size     = 1.0;
1328   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1329   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1330   isend[3] = info->memory;  isend[4] = info->mallocs;
1331   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1332   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1333   isend[3] += info->memory;  isend[4] += info->mallocs;
1334   if (flag == MAT_LOCAL) {
1335     info->nz_used      = isend[0];
1336     info->nz_allocated = isend[1];
1337     info->nz_unneeded  = isend[2];
1338     info->memory       = isend[3];
1339     info->mallocs      = isend[4];
1340   } else if (flag == MAT_GLOBAL_MAX) {
1341     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1342     info->nz_used      = irecv[0];
1343     info->nz_allocated = irecv[1];
1344     info->nz_unneeded  = irecv[2];
1345     info->memory       = irecv[3];
1346     info->mallocs      = irecv[4];
1347   } else if (flag == MAT_GLOBAL_SUM) {
1348     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1349     info->nz_used      = irecv[0];
1350     info->nz_allocated = irecv[1];
1351     info->nz_unneeded  = irecv[2];
1352     info->memory       = irecv[3];
1353     info->mallocs      = irecv[4];
1354   }
1355   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1356   info->fill_ratio_needed = 0;
1357   info->factor_mallocs    = 0;
1358   info->rows_global       = (double)matin->rmap->N;
1359   info->columns_global    = (double)matin->cmap->N;
1360   info->rows_local        = (double)matin->rmap->n;
1361   info->columns_local     = (double)matin->cmap->N;
1362 
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetOption_MPIAIJ"
1368 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1369 {
1370   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   switch (op) {
1375   case MAT_NEW_NONZERO_LOCATIONS:
1376   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1377   case MAT_KEEP_ZEROED_ROWS:
1378   case MAT_NEW_NONZERO_LOCATION_ERR:
1379   case MAT_USE_INODES:
1380   case MAT_IGNORE_ZERO_ENTRIES:
1381     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1382     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1383     break;
1384   case MAT_ROW_ORIENTED:
1385     a->roworiented = flg;
1386     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1387     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1388     break;
1389   case MAT_NEW_DIAGONALS:
1390     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1391     break;
1392   case MAT_IGNORE_OFF_PROC_ENTRIES:
1393     a->donotstash = PETSC_TRUE;
1394     break;
1395   case MAT_SYMMETRIC:
1396     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1397     break;
1398   case MAT_STRUCTURALLY_SYMMETRIC:
1399   case MAT_HERMITIAN:
1400   case MAT_SYMMETRY_ETERNAL:
1401     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1402     break;
1403   default:
1404     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatGetRow_MPIAIJ"
1411 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1412 {
1413   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1414   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1415   PetscErrorCode ierr;
1416   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1417   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1418   PetscInt       *cmap,*idx_p;
1419 
1420   PetscFunctionBegin;
1421   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1422   mat->getrowactive = PETSC_TRUE;
1423 
1424   if (!mat->rowvalues && (idx || v)) {
1425     /*
1426         allocate enough space to hold information from the longest row.
1427     */
1428     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1429     PetscInt     max = 1,tmp;
1430     for (i=0; i<matin->rmap->n; i++) {
1431       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1432       if (max < tmp) { max = tmp; }
1433     }
1434     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1435     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1436   }
1437 
1438   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1439   lrow = row - rstart;
1440 
1441   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1442   if (!v)   {pvA = 0; pvB = 0;}
1443   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1444   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1445   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1446   nztot = nzA + nzB;
1447 
1448   cmap  = mat->garray;
1449   if (v  || idx) {
1450     if (nztot) {
1451       /* Sort by increasing column numbers, assuming A and B already sorted */
1452       PetscInt imark = -1;
1453       if (v) {
1454         *v = v_p = mat->rowvalues;
1455         for (i=0; i<nzB; i++) {
1456           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1457           else break;
1458         }
1459         imark = i;
1460         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1461         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1462       }
1463       if (idx) {
1464         *idx = idx_p = mat->rowindices;
1465         if (imark > -1) {
1466           for (i=0; i<imark; i++) {
1467             idx_p[i] = cmap[cworkB[i]];
1468           }
1469         } else {
1470           for (i=0; i<nzB; i++) {
1471             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1472             else break;
1473           }
1474           imark = i;
1475         }
1476         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1477         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1478       }
1479     } else {
1480       if (idx) *idx = 0;
1481       if (v)   *v   = 0;
1482     }
1483   }
1484   *nz = nztot;
1485   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1486   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1492 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1493 {
1494   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1495 
1496   PetscFunctionBegin;
1497   if (!aij->getrowactive) {
1498     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1499   }
1500   aij->getrowactive = PETSC_FALSE;
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatNorm_MPIAIJ"
1506 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1507 {
1508   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1509   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1510   PetscErrorCode ierr;
1511   PetscInt       i,j,cstart = mat->cmap->rstart;
1512   PetscReal      sum = 0.0;
1513   MatScalar      *v;
1514 
1515   PetscFunctionBegin;
1516   if (aij->size == 1) {
1517     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1518   } else {
1519     if (type == NORM_FROBENIUS) {
1520       v = amat->a;
1521       for (i=0; i<amat->nz; i++) {
1522 #if defined(PETSC_USE_COMPLEX)
1523         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1524 #else
1525         sum += (*v)*(*v); v++;
1526 #endif
1527       }
1528       v = bmat->a;
1529       for (i=0; i<bmat->nz; i++) {
1530 #if defined(PETSC_USE_COMPLEX)
1531         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1532 #else
1533         sum += (*v)*(*v); v++;
1534 #endif
1535       }
1536       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1537       *norm = sqrt(*norm);
1538     } else if (type == NORM_1) { /* max column norm */
1539       PetscReal *tmp,*tmp2;
1540       PetscInt  *jj,*garray = aij->garray;
1541       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1542       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1543       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1544       *norm = 0.0;
1545       v = amat->a; jj = amat->j;
1546       for (j=0; j<amat->nz; j++) {
1547         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1548       }
1549       v = bmat->a; jj = bmat->j;
1550       for (j=0; j<bmat->nz; j++) {
1551         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1552       }
1553       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1554       for (j=0; j<mat->cmap->N; j++) {
1555         if (tmp2[j] > *norm) *norm = tmp2[j];
1556       }
1557       ierr = PetscFree(tmp);CHKERRQ(ierr);
1558       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1559     } else if (type == NORM_INFINITY) { /* max row norm */
1560       PetscReal ntemp = 0.0;
1561       for (j=0; j<aij->A->rmap->n; j++) {
1562         v = amat->a + amat->i[j];
1563         sum = 0.0;
1564         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1565           sum += PetscAbsScalar(*v); v++;
1566         }
1567         v = bmat->a + bmat->i[j];
1568         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1569           sum += PetscAbsScalar(*v); v++;
1570         }
1571         if (sum > ntemp) ntemp = sum;
1572       }
1573       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1574     } else {
1575       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1576     }
1577   }
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatTranspose_MPIAIJ"
1583 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1584 {
1585   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1586   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1587   PetscErrorCode ierr;
1588   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1589   PetscInt       cstart=A->cmap->rstart,ncol;
1590   Mat            B;
1591   MatScalar      *array;
1592 
1593   PetscFunctionBegin;
1594   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1595 
1596   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1597   ai = Aloc->i; aj = Aloc->j;
1598   bi = Bloc->i; bj = Bloc->j;
1599   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1600     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1601     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1602     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1603     for (i=0; i<ai[ma]; i++){
1604       d_nnz[aj[i]] ++;
1605       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1606     }
1607 
1608     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1609     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1610     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1611     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1612     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1613   } else {
1614     B = *matout;
1615   }
1616 
1617   /* copy over the A part */
1618   array = Aloc->a;
1619   row = A->rmap->rstart;
1620   for (i=0; i<ma; i++) {
1621     ncol = ai[i+1]-ai[i];
1622     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1623     row++; array += ncol; aj += ncol;
1624   }
1625   aj = Aloc->j;
1626   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1627 
1628   /* copy over the B part */
1629   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1630   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1631   array = Bloc->a;
1632   row = A->rmap->rstart;
1633   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1634   cols_tmp = cols;
1635   for (i=0; i<mb; i++) {
1636     ncol = bi[i+1]-bi[i];
1637     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1638     row++; array += ncol; cols_tmp += ncol;
1639   }
1640   ierr = PetscFree(cols);CHKERRQ(ierr);
1641 
1642   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1645     *matout = B;
1646   } else {
1647     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1654 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1655 {
1656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1657   Mat            a = aij->A,b = aij->B;
1658   PetscErrorCode ierr;
1659   PetscInt       s1,s2,s3;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1663   if (rr) {
1664     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1665     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1666     /* Overlap communication with computation. */
1667     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1668   }
1669   if (ll) {
1670     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1671     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1672     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1673   }
1674   /* scale  the diagonal block */
1675   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1676 
1677   if (rr) {
1678     /* Do a scatter end and then right scale the off-diagonal block */
1679     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1680     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1681   }
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1688 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1689 {
1690   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1695   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1700 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1701 {
1702   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1703   PetscErrorCode ierr;
1704 
1705   PetscFunctionBegin;
1706   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatEqual_MPIAIJ"
1712 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1713 {
1714   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1715   Mat            a,b,c,d;
1716   PetscTruth     flg;
1717   PetscErrorCode ierr;
1718 
1719   PetscFunctionBegin;
1720   a = matA->A; b = matA->B;
1721   c = matB->A; d = matB->B;
1722 
1723   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1724   if (flg) {
1725     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1726   }
1727   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatCopy_MPIAIJ"
1733 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1734 {
1735   PetscErrorCode ierr;
1736   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1737   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1738 
1739   PetscFunctionBegin;
1740   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1741   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1742     /* because of the column compression in the off-processor part of the matrix a->B,
1743        the number of columns in a->B and b->B may be different, hence we cannot call
1744        the MatCopy() directly on the two parts. If need be, we can provide a more
1745        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1746        then copying the submatrices */
1747     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1748   } else {
1749     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1750     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1751   }
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1757 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1758 {
1759   PetscErrorCode ierr;
1760 
1761   PetscFunctionBegin;
1762   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #include "petscblaslapack.h"
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatAXPY_MPIAIJ"
1769 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1770 {
1771   PetscErrorCode ierr;
1772   PetscInt       i;
1773   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1774   PetscBLASInt   bnz,one=1;
1775   Mat_SeqAIJ     *x,*y;
1776 
1777   PetscFunctionBegin;
1778   if (str == SAME_NONZERO_PATTERN) {
1779     PetscScalar alpha = a;
1780     x = (Mat_SeqAIJ *)xx->A->data;
1781     y = (Mat_SeqAIJ *)yy->A->data;
1782     bnz = PetscBLASIntCast(x->nz);
1783     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1784     x = (Mat_SeqAIJ *)xx->B->data;
1785     y = (Mat_SeqAIJ *)yy->B->data;
1786     bnz = PetscBLASIntCast(x->nz);
1787     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1788   } else if (str == SUBSET_NONZERO_PATTERN) {
1789     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1790 
1791     x = (Mat_SeqAIJ *)xx->B->data;
1792     y = (Mat_SeqAIJ *)yy->B->data;
1793     if (y->xtoy && y->XtoY != xx->B) {
1794       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1795       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1796     }
1797     if (!y->xtoy) { /* get xtoy */
1798       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1799       y->XtoY = xx->B;
1800       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1801     }
1802     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1803   } else {
1804     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1805   }
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatConjugate_MPIAIJ"
1813 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1814 {
1815 #if defined(PETSC_USE_COMPLEX)
1816   PetscErrorCode ierr;
1817   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1818 
1819   PetscFunctionBegin;
1820   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1821   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1822 #else
1823   PetscFunctionBegin;
1824 #endif
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatRealPart_MPIAIJ"
1830 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1831 {
1832   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1833   PetscErrorCode ierr;
1834 
1835   PetscFunctionBegin;
1836   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1837   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1843 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1844 {
1845   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1846   PetscErrorCode ierr;
1847 
1848   PetscFunctionBegin;
1849   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1850   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #ifdef PETSC_HAVE_PBGL
1855 
1856 #include <boost/parallel/mpi/bsp_process_group.hpp>
1857 #include <boost/graph/distributed/ilu_default_graph.hpp>
1858 #include <boost/graph/distributed/ilu_0_block.hpp>
1859 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1860 #include <boost/graph/distributed/petsc/interface.hpp>
1861 #include <boost/multi_array.hpp>
1862 #include <boost/parallel/distributed_property_map->hpp>
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1866 /*
1867   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1868 */
1869 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1870 {
1871   namespace petsc = boost::distributed::petsc;
1872 
1873   namespace graph_dist = boost::graph::distributed;
1874   using boost::graph::distributed::ilu_default::process_group_type;
1875   using boost::graph::ilu_permuted;
1876 
1877   PetscTruth      row_identity, col_identity;
1878   PetscContainer  c;
1879   PetscInt        m, n, M, N;
1880   PetscErrorCode  ierr;
1881 
1882   PetscFunctionBegin;
1883   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1884   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1885   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1886   if (!row_identity || !col_identity) {
1887     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1888   }
1889 
1890   process_group_type pg;
1891   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1892   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1893   lgraph_type&   level_graph = *lgraph_p;
1894   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1895 
1896   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1897   ilu_permuted(level_graph);
1898 
1899   /* put together the new matrix */
1900   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1901   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1902   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1903   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1904   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1905   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1906   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1907 
1908   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1909   ierr = PetscContainerSetPointer(c, lgraph_p);
1910   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1916 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1917 {
1918   PetscFunctionBegin;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatSolve_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1928 {
1929   namespace graph_dist = boost::graph::distributed;
1930 
1931   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1932   lgraph_type*   lgraph_p;
1933   PetscContainer c;
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1938   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1939   ierr = VecCopy(b, x); CHKERRQ(ierr);
1940 
1941   PetscScalar* array_x;
1942   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1943   PetscInt sx;
1944   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_b;
1947   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1948   PetscInt sb;
1949   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1950 
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1955   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1956                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1957 
1958   typedef boost::iterator_property_map<array_ref_type::iterator,
1959                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1960   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1961                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1962 
1963   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1964 
1965   PetscFunctionReturn(0);
1966 }
1967 #endif
1968 
1969 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1970   PetscInt       nzlocal,nsends,nrecvs;
1971   PetscMPIInt    *send_rank;
1972   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1973   PetscScalar    *sbuf_a,**rbuf_a;
1974   PetscErrorCode (*MatDestroy)(Mat);
1975 } Mat_Redundant;
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1979 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1980 {
1981   PetscErrorCode       ierr;
1982   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1983   PetscInt             i;
1984 
1985   PetscFunctionBegin;
1986   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1987   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1988   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1989   for (i=0; i<redund->nrecvs; i++){
1990     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1991     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1992   }
1993   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1994   ierr = PetscFree(redund);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatDestroy_MatRedundant"
2000 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2001 {
2002   PetscErrorCode  ierr;
2003   PetscContainer  container;
2004   Mat_Redundant   *redund=PETSC_NULL;
2005 
2006   PetscFunctionBegin;
2007   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2008   if (container) {
2009     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2010   } else {
2011     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2012   }
2013   A->ops->destroy = redund->MatDestroy;
2014   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2015   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2016   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2022 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2023 {
2024   PetscMPIInt    rank,size;
2025   MPI_Comm       comm=((PetscObject)mat)->comm;
2026   PetscErrorCode ierr;
2027   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2028   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2029   PetscInt       *rowrange=mat->rmap->range;
2030   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2031   Mat            A=aij->A,B=aij->B,C=*matredundant;
2032   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2033   PetscScalar    *sbuf_a;
2034   PetscInt       nzlocal=a->nz+b->nz;
2035   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2036   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2037   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2038   MatScalar      *aworkA,*aworkB;
2039   PetscScalar    *vals;
2040   PetscMPIInt    tag1,tag2,tag3,imdex;
2041   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2042                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2043   MPI_Status     recv_status,*send_status;
2044   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2045   PetscInt       **rbuf_j=PETSC_NULL;
2046   PetscScalar    **rbuf_a=PETSC_NULL;
2047   Mat_Redundant  *redund=PETSC_NULL;
2048   PetscContainer container;
2049 
2050   PetscFunctionBegin;
2051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053 
2054   if (reuse == MAT_REUSE_MATRIX) {
2055     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2056     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2057     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2058     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2059     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2060     if (container) {
2061       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2062     } else {
2063       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2064     }
2065     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2066 
2067     nsends    = redund->nsends;
2068     nrecvs    = redund->nrecvs;
2069     send_rank = redund->send_rank; recv_rank = send_rank + size;
2070     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2071     sbuf_j    = redund->sbuf_j;
2072     sbuf_a    = redund->sbuf_a;
2073     rbuf_j    = redund->rbuf_j;
2074     rbuf_a    = redund->rbuf_a;
2075   }
2076 
2077   if (reuse == MAT_INITIAL_MATRIX){
2078     PetscMPIInt  subrank,subsize;
2079     PetscInt     nleftover,np_subcomm;
2080     /* get the destination processors' id send_rank, nsends and nrecvs */
2081     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2082     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2083     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2084     recv_rank = send_rank + size;
2085     np_subcomm = size/nsubcomm;
2086     nleftover  = size - nsubcomm*np_subcomm;
2087     nsends = 0; nrecvs = 0;
2088     for (i=0; i<size; i++){ /* i=rank*/
2089       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2090         send_rank[nsends] = i; nsends++;
2091         recv_rank[nrecvs++] = i;
2092       }
2093     }
2094     if (rank >= size - nleftover){/* this proc is a leftover processor */
2095       i = size-nleftover-1;
2096       j = 0;
2097       while (j < nsubcomm - nleftover){
2098         send_rank[nsends++] = i;
2099         i--; j++;
2100       }
2101     }
2102 
2103     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2104       for (i=0; i<nleftover; i++){
2105         recv_rank[nrecvs++] = size-nleftover+i;
2106       }
2107     }
2108 
2109     /* allocate sbuf_j, sbuf_a */
2110     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2111     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2112     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2113   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2114 
2115   /* copy mat's local entries into the buffers */
2116   if (reuse == MAT_INITIAL_MATRIX){
2117     rownz_max = 0;
2118     rptr = sbuf_j;
2119     cols = sbuf_j + rend-rstart + 1;
2120     vals = sbuf_a;
2121     rptr[0] = 0;
2122     for (i=0; i<rend-rstart; i++){
2123       row = i + rstart;
2124       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2125       ncols  = nzA + nzB;
2126       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2127       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2128       /* load the column indices for this row into cols */
2129       lwrite = 0;
2130       for (l=0; l<nzB; l++) {
2131         if ((ctmp = bmap[cworkB[l]]) < cstart){
2132           vals[lwrite]   = aworkB[l];
2133           cols[lwrite++] = ctmp;
2134         }
2135       }
2136       for (l=0; l<nzA; l++){
2137         vals[lwrite]   = aworkA[l];
2138         cols[lwrite++] = cstart + cworkA[l];
2139       }
2140       for (l=0; l<nzB; l++) {
2141         if ((ctmp = bmap[cworkB[l]]) >= cend){
2142           vals[lwrite]   = aworkB[l];
2143           cols[lwrite++] = ctmp;
2144         }
2145       }
2146       vals += ncols;
2147       cols += ncols;
2148       rptr[i+1] = rptr[i] + ncols;
2149       if (rownz_max < ncols) rownz_max = ncols;
2150     }
2151     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2152   } else { /* only copy matrix values into sbuf_a */
2153     rptr = sbuf_j;
2154     vals = sbuf_a;
2155     rptr[0] = 0;
2156     for (i=0; i<rend-rstart; i++){
2157       row = i + rstart;
2158       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2159       ncols  = nzA + nzB;
2160       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2161       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2162       lwrite = 0;
2163       for (l=0; l<nzB; l++) {
2164         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2165       }
2166       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2167       for (l=0; l<nzB; l++) {
2168         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2169       }
2170       vals += ncols;
2171       rptr[i+1] = rptr[i] + ncols;
2172     }
2173   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2174 
2175   /* send nzlocal to others, and recv other's nzlocal */
2176   /*--------------------------------------------------*/
2177   if (reuse == MAT_INITIAL_MATRIX){
2178     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2179     s_waits2 = s_waits3 + nsends;
2180     s_waits1 = s_waits2 + nsends;
2181     r_waits1 = s_waits1 + nsends;
2182     r_waits2 = r_waits1 + nrecvs;
2183     r_waits3 = r_waits2 + nrecvs;
2184   } else {
2185     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2186     r_waits3 = s_waits3 + nsends;
2187   }
2188 
2189   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2190   if (reuse == MAT_INITIAL_MATRIX){
2191     /* get new tags to keep the communication clean */
2192     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2193     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2194     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2195     rbuf_nz = sbuf_nz + nsends;
2196 
2197     /* post receives of other's nzlocal */
2198     for (i=0; i<nrecvs; i++){
2199       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2200     }
2201     /* send nzlocal to others */
2202     for (i=0; i<nsends; i++){
2203       sbuf_nz[i] = nzlocal;
2204       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2207     count = nrecvs;
2208     while (count) {
2209       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2210       recv_rank[imdex] = recv_status.MPI_SOURCE;
2211       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2212       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2213 
2214       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2215       rbuf_nz[imdex] += i + 2;
2216       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2217       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2218       count--;
2219     }
2220     /* wait on sends of nzlocal */
2221     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2222     /* send mat->i,j to others, and recv from other's */
2223     /*------------------------------------------------*/
2224     for (i=0; i<nsends; i++){
2225       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2226       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2227     }
2228     /* wait on receives of mat->i,j */
2229     /*------------------------------*/
2230     count = nrecvs;
2231     while (count) {
2232       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2233       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2234       count--;
2235     }
2236     /* wait on sends of mat->i,j */
2237     /*---------------------------*/
2238     if (nsends) {
2239       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2240     }
2241   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2242 
2243   /* post receives, send and receive mat->a */
2244   /*----------------------------------------*/
2245   for (imdex=0; imdex<nrecvs; imdex++) {
2246     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2247   }
2248   for (i=0; i<nsends; i++){
2249     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2250   }
2251   count = nrecvs;
2252   while (count) {
2253     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2254     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2255     count--;
2256   }
2257   if (nsends) {
2258     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2259   }
2260 
2261   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2262 
2263   /* create redundant matrix */
2264   /*-------------------------*/
2265   if (reuse == MAT_INITIAL_MATRIX){
2266     /* compute rownz_max for preallocation */
2267     for (imdex=0; imdex<nrecvs; imdex++){
2268       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2269       rptr = rbuf_j[imdex];
2270       for (i=0; i<j; i++){
2271         ncols = rptr[i+1] - rptr[i];
2272         if (rownz_max < ncols) rownz_max = ncols;
2273       }
2274     }
2275 
2276     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2277     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2278     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2279     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2280     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2281   } else {
2282     C = *matredundant;
2283   }
2284 
2285   /* insert local matrix entries */
2286   rptr = sbuf_j;
2287   cols = sbuf_j + rend-rstart + 1;
2288   vals = sbuf_a;
2289   for (i=0; i<rend-rstart; i++){
2290     row   = i + rstart;
2291     ncols = rptr[i+1] - rptr[i];
2292     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2293     vals += ncols;
2294     cols += ncols;
2295   }
2296   /* insert received matrix entries */
2297   for (imdex=0; imdex<nrecvs; imdex++){
2298     rstart = rowrange[recv_rank[imdex]];
2299     rend   = rowrange[recv_rank[imdex]+1];
2300     rptr = rbuf_j[imdex];
2301     cols = rbuf_j[imdex] + rend-rstart + 1;
2302     vals = rbuf_a[imdex];
2303     for (i=0; i<rend-rstart; i++){
2304       row   = i + rstart;
2305       ncols = rptr[i+1] - rptr[i];
2306       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2307       vals += ncols;
2308       cols += ncols;
2309     }
2310   }
2311   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2313   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2314   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2315   if (reuse == MAT_INITIAL_MATRIX){
2316     PetscContainer container;
2317     *matredundant = C;
2318     /* create a supporting struct and attach it to C for reuse */
2319     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2320     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2321     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2322     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2323     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2324 
2325     redund->nzlocal = nzlocal;
2326     redund->nsends  = nsends;
2327     redund->nrecvs  = nrecvs;
2328     redund->send_rank = send_rank;
2329     redund->sbuf_nz = sbuf_nz;
2330     redund->sbuf_j  = sbuf_j;
2331     redund->sbuf_a  = sbuf_a;
2332     redund->rbuf_j  = rbuf_j;
2333     redund->rbuf_a  = rbuf_a;
2334 
2335     redund->MatDestroy = C->ops->destroy;
2336     C->ops->destroy    = MatDestroy_MatRedundant;
2337   }
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2343 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2344 {
2345   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2346   PetscErrorCode ierr;
2347   PetscInt       i,*idxb = 0;
2348   PetscScalar    *va,*vb;
2349   Vec            vtmp;
2350 
2351   PetscFunctionBegin;
2352   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2353   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2354   if (idx) {
2355     for (i=0; i<A->cmap->n; i++) {
2356       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2357     }
2358   }
2359 
2360   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2361   if (idx) {
2362     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2363   }
2364   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2365   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2366 
2367   for (i=0; i<A->rmap->n; i++){
2368     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2369       va[i] = vb[i];
2370       if (idx) idx[i] = a->garray[idxb[i]];
2371     }
2372   }
2373 
2374   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2375   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2376   if (idxb) {
2377     ierr = PetscFree(idxb);CHKERRQ(ierr);
2378   }
2379   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2385 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389   PetscInt       i,*idxb = 0;
2390   PetscScalar    *va,*vb;
2391   Vec            vtmp;
2392 
2393   PetscFunctionBegin;
2394   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2395   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2396   if (idx) {
2397     for (i=0; i<A->cmap->n; i++) {
2398       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2399     }
2400   }
2401 
2402   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2403   if (idx) {
2404     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2405   }
2406   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2407   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2408 
2409   for (i=0; i<A->rmap->n; i++){
2410     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2411       va[i] = vb[i];
2412       if (idx) idx[i] = a->garray[idxb[i]];
2413     }
2414   }
2415 
2416   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2417   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2418   if (idxb) {
2419     ierr = PetscFree(idxb);CHKERRQ(ierr);
2420   }
2421   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2427 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2428 {
2429   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2430   PetscInt       n      = A->rmap->n;
2431   PetscInt       cstart = A->cmap->rstart;
2432   PetscInt      *cmap   = mat->garray;
2433   PetscInt      *diagIdx, *offdiagIdx;
2434   Vec            diagV, offdiagV;
2435   PetscScalar   *a, *diagA, *offdiagA;
2436   PetscInt       r;
2437   PetscErrorCode ierr;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2441   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2442   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2443   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2444   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2445   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2446   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2447   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2448   for(r = 0; r < n; ++r) {
2449     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2450       a[r]   = diagA[r];
2451       idx[r] = cstart + diagIdx[r];
2452     } else {
2453       a[r]   = offdiagA[r];
2454       idx[r] = cmap[offdiagIdx[r]];
2455     }
2456   }
2457   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2458   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2459   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2460   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2461   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2462   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2468 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2469 {
2470   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2471   PetscInt       n      = A->rmap->n;
2472   PetscInt       cstart = A->cmap->rstart;
2473   PetscInt      *cmap   = mat->garray;
2474   PetscInt      *diagIdx, *offdiagIdx;
2475   Vec            diagV, offdiagV;
2476   PetscScalar   *a, *diagA, *offdiagA;
2477   PetscInt       r;
2478   PetscErrorCode ierr;
2479 
2480   PetscFunctionBegin;
2481   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2482   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2483   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2484   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2485   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2486   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2487   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2488   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2489   for(r = 0; r < n; ++r) {
2490     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2491       a[r]   = diagA[r];
2492       idx[r] = cstart + diagIdx[r];
2493     } else {
2494       a[r]   = offdiagA[r];
2495       idx[r] = cmap[offdiagIdx[r]];
2496     }
2497   }
2498   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2499   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2500   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2501   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2502   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2503   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2509 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2510 {
2511   PetscErrorCode ierr;
2512 
2513   PetscFunctionBegin;
2514   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 /* -------------------------------------------------------------------*/
2519 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2520        MatGetRow_MPIAIJ,
2521        MatRestoreRow_MPIAIJ,
2522        MatMult_MPIAIJ,
2523 /* 4*/ MatMultAdd_MPIAIJ,
2524        MatMultTranspose_MPIAIJ,
2525        MatMultTransposeAdd_MPIAIJ,
2526 #ifdef PETSC_HAVE_PBGL
2527        MatSolve_MPIAIJ,
2528 #else
2529        0,
2530 #endif
2531        0,
2532        0,
2533 /*10*/ 0,
2534        0,
2535        0,
2536        MatRelax_MPIAIJ,
2537        MatTranspose_MPIAIJ,
2538 /*15*/ MatGetInfo_MPIAIJ,
2539        MatEqual_MPIAIJ,
2540        MatGetDiagonal_MPIAIJ,
2541        MatDiagonalScale_MPIAIJ,
2542        MatNorm_MPIAIJ,
2543 /*20*/ MatAssemblyBegin_MPIAIJ,
2544        MatAssemblyEnd_MPIAIJ,
2545        0,
2546        MatSetOption_MPIAIJ,
2547        MatZeroEntries_MPIAIJ,
2548 /*25*/ MatZeroRows_MPIAIJ,
2549        0,
2550 #ifdef PETSC_HAVE_PBGL
2551        0,
2552 #else
2553        0,
2554 #endif
2555        0,
2556        0,
2557 /*30*/ MatSetUpPreallocation_MPIAIJ,
2558 #ifdef PETSC_HAVE_PBGL
2559        0,
2560 #else
2561        0,
2562 #endif
2563        0,
2564        0,
2565        0,
2566 /*35*/ MatDuplicate_MPIAIJ,
2567        0,
2568        0,
2569        0,
2570        0,
2571 /*40*/ MatAXPY_MPIAIJ,
2572        MatGetSubMatrices_MPIAIJ,
2573        MatIncreaseOverlap_MPIAIJ,
2574        MatGetValues_MPIAIJ,
2575        MatCopy_MPIAIJ,
2576 /*45*/ MatGetRowMax_MPIAIJ,
2577        MatScale_MPIAIJ,
2578        0,
2579        0,
2580        0,
2581 /*50*/ MatSetBlockSize_MPIAIJ,
2582        0,
2583        0,
2584        0,
2585        0,
2586 /*55*/ MatFDColoringCreate_MPIAIJ,
2587        0,
2588        MatSetUnfactored_MPIAIJ,
2589        MatPermute_MPIAIJ,
2590        0,
2591 /*60*/ MatGetSubMatrix_MPIAIJ,
2592        MatDestroy_MPIAIJ,
2593        MatView_MPIAIJ,
2594        0,
2595        0,
2596 /*65*/ 0,
2597        0,
2598        0,
2599        0,
2600        0,
2601 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2602        MatGetRowMinAbs_MPIAIJ,
2603        0,
2604        MatSetColoring_MPIAIJ,
2605 #if defined(PETSC_HAVE_ADIC)
2606        MatSetValuesAdic_MPIAIJ,
2607 #else
2608        0,
2609 #endif
2610        MatSetValuesAdifor_MPIAIJ,
2611 /*75*/ 0,
2612        0,
2613        0,
2614        0,
2615        0,
2616 /*80*/ 0,
2617        0,
2618        0,
2619 /*84*/ MatLoad_MPIAIJ,
2620        0,
2621        0,
2622        0,
2623        0,
2624        0,
2625 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2626        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2627        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2628        MatPtAP_Basic,
2629        MatPtAPSymbolic_MPIAIJ,
2630 /*95*/ MatPtAPNumeric_MPIAIJ,
2631        0,
2632        0,
2633        0,
2634        0,
2635 /*100*/0,
2636        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2637        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2638        MatConjugate_MPIAIJ,
2639        0,
2640 /*105*/MatSetValuesRow_MPIAIJ,
2641        MatRealPart_MPIAIJ,
2642        MatImaginaryPart_MPIAIJ,
2643        0,
2644        0,
2645 /*110*/0,
2646        MatGetRedundantMatrix_MPIAIJ,
2647        MatGetRowMin_MPIAIJ,
2648        0,
2649        0,
2650 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2651 
2652 /* ----------------------------------------------------------------------------------------*/
2653 
2654 EXTERN_C_BEGIN
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2657 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2658 {
2659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2660   PetscErrorCode ierr;
2661 
2662   PetscFunctionBegin;
2663   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2664   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 EXTERN_C_END
2668 
2669 EXTERN_C_BEGIN
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2672 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2673 {
2674   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2675   PetscErrorCode ierr;
2676 
2677   PetscFunctionBegin;
2678   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2679   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2680   PetscFunctionReturn(0);
2681 }
2682 EXTERN_C_END
2683 
2684 #include "petscpc.h"
2685 EXTERN_C_BEGIN
2686 #undef __FUNCT__
2687 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2688 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2689 {
2690   Mat_MPIAIJ     *b;
2691   PetscErrorCode ierr;
2692   PetscInt       i;
2693 
2694   PetscFunctionBegin;
2695   B->preallocated = PETSC_TRUE;
2696   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2697   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2698   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2699   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2700 
2701   B->rmap->bs = B->cmap->bs = 1;
2702   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2703   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2704   if (d_nnz) {
2705     for (i=0; i<B->rmap->n; i++) {
2706       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2707     }
2708   }
2709   if (o_nnz) {
2710     for (i=0; i<B->rmap->n; i++) {
2711       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2712     }
2713   }
2714   b = (Mat_MPIAIJ*)B->data;
2715 
2716   /* Explicitly create 2 MATSEQAIJ matrices. */
2717   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2718   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2719   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2720   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2721   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2722   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2723   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2724   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2725 
2726   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2727   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2728 
2729   PetscFunctionReturn(0);
2730 }
2731 EXTERN_C_END
2732 
2733 #undef __FUNCT__
2734 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2735 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2736 {
2737   Mat            mat;
2738   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2739   PetscErrorCode ierr;
2740 
2741   PetscFunctionBegin;
2742   *newmat       = 0;
2743   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2744   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2745   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2746   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2747   a    = (Mat_MPIAIJ*)mat->data;
2748 
2749   mat->factor       = matin->factor;
2750   mat->rmap->bs      = matin->rmap->bs;
2751   mat->assembled    = PETSC_TRUE;
2752   mat->insertmode   = NOT_SET_VALUES;
2753   mat->preallocated = PETSC_TRUE;
2754 
2755   a->size           = oldmat->size;
2756   a->rank           = oldmat->rank;
2757   a->donotstash     = oldmat->donotstash;
2758   a->roworiented    = oldmat->roworiented;
2759   a->rowindices     = 0;
2760   a->rowvalues      = 0;
2761   a->getrowactive   = PETSC_FALSE;
2762 
2763   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2764   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2765 
2766   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2767   if (oldmat->colmap) {
2768 #if defined (PETSC_USE_CTABLE)
2769     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2770 #else
2771     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2772     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2773     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2774 #endif
2775   } else a->colmap = 0;
2776   if (oldmat->garray) {
2777     PetscInt len;
2778     len  = oldmat->B->cmap->n;
2779     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2780     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2781     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2782   } else a->garray = 0;
2783 
2784   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2785   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2786   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2787   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2788   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2789   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2790   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2791   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2792   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2793   *newmat = mat;
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 #include "petscsys.h"
2798 
2799 #undef __FUNCT__
2800 #define __FUNCT__ "MatLoad_MPIAIJ"
2801 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2802 {
2803   Mat            A;
2804   PetscScalar    *vals,*svals;
2805   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2806   MPI_Status     status;
2807   PetscErrorCode ierr;
2808   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2809   PetscInt       i,nz,j,rstart,rend,mmax;
2810   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2811   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2812   PetscInt       cend,cstart,n,*rowners;
2813   int            fd;
2814 
2815   PetscFunctionBegin;
2816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2817   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2818   if (!rank) {
2819     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2820     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2821     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2822   }
2823 
2824   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2825   M = header[1]; N = header[2];
2826   /* determine ownership of all rows */
2827   m    = M/size + ((M % size) > rank);
2828   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2829   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2830 
2831   /* First process needs enough room for process with most rows */
2832   if (!rank) {
2833     mmax       = rowners[1];
2834     for (i=2; i<size; i++) {
2835       mmax = PetscMax(mmax,rowners[i]);
2836     }
2837   } else mmax = m;
2838 
2839   rowners[0] = 0;
2840   for (i=2; i<=size; i++) {
2841     rowners[i] += rowners[i-1];
2842   }
2843   rstart = rowners[rank];
2844   rend   = rowners[rank+1];
2845 
2846   /* distribute row lengths to all processors */
2847   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2848   if (!rank) {
2849     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2850     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2851     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2852     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2853     for (j=0; j<m; j++) {
2854       procsnz[0] += ourlens[j];
2855     }
2856     for (i=1; i<size; i++) {
2857       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2858       /* calculate the number of nonzeros on each processor */
2859       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2860         procsnz[i] += rowlengths[j];
2861       }
2862       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2863     }
2864     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2865   } else {
2866     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2867   }
2868 
2869   if (!rank) {
2870     /* determine max buffer needed and allocate it */
2871     maxnz = 0;
2872     for (i=0; i<size; i++) {
2873       maxnz = PetscMax(maxnz,procsnz[i]);
2874     }
2875     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2876 
2877     /* read in my part of the matrix column indices  */
2878     nz   = procsnz[0];
2879     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2880     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2881 
2882     /* read in every one elses and ship off */
2883     for (i=1; i<size; i++) {
2884       nz   = procsnz[i];
2885       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2886       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2887     }
2888     ierr = PetscFree(cols);CHKERRQ(ierr);
2889   } else {
2890     /* determine buffer space needed for message */
2891     nz = 0;
2892     for (i=0; i<m; i++) {
2893       nz += ourlens[i];
2894     }
2895     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2896 
2897     /* receive message of column indices*/
2898     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2899     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2900     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2901   }
2902 
2903   /* determine column ownership if matrix is not square */
2904   if (N != M) {
2905     n      = N/size + ((N % size) > rank);
2906     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2907     cstart = cend - n;
2908   } else {
2909     cstart = rstart;
2910     cend   = rend;
2911     n      = cend - cstart;
2912   }
2913 
2914   /* loop over local rows, determining number of off diagonal entries */
2915   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2916   jj = 0;
2917   for (i=0; i<m; i++) {
2918     for (j=0; j<ourlens[i]; j++) {
2919       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2920       jj++;
2921     }
2922   }
2923 
2924   /* create our matrix */
2925   for (i=0; i<m; i++) {
2926     ourlens[i] -= offlens[i];
2927   }
2928   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2929   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2930   ierr = MatSetType(A,type);CHKERRQ(ierr);
2931   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2932 
2933   for (i=0; i<m; i++) {
2934     ourlens[i] += offlens[i];
2935   }
2936 
2937   if (!rank) {
2938     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2939 
2940     /* read in my part of the matrix numerical values  */
2941     nz   = procsnz[0];
2942     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2943 
2944     /* insert into matrix */
2945     jj      = rstart;
2946     smycols = mycols;
2947     svals   = vals;
2948     for (i=0; i<m; i++) {
2949       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2950       smycols += ourlens[i];
2951       svals   += ourlens[i];
2952       jj++;
2953     }
2954 
2955     /* read in other processors and ship out */
2956     for (i=1; i<size; i++) {
2957       nz   = procsnz[i];
2958       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2959       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2960     }
2961     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2962   } else {
2963     /* receive numeric values */
2964     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2965 
2966     /* receive message of values*/
2967     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2968     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2969     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2970 
2971     /* insert into matrix */
2972     jj      = rstart;
2973     smycols = mycols;
2974     svals   = vals;
2975     for (i=0; i<m; i++) {
2976       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2977       smycols += ourlens[i];
2978       svals   += ourlens[i];
2979       jj++;
2980     }
2981   }
2982   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2983   ierr = PetscFree(vals);CHKERRQ(ierr);
2984   ierr = PetscFree(mycols);CHKERRQ(ierr);
2985   ierr = PetscFree(rowners);CHKERRQ(ierr);
2986 
2987   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2988   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2989   *newmat = A;
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 #undef __FUNCT__
2994 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2995 /*
2996     Not great since it makes two copies of the submatrix, first an SeqAIJ
2997   in local and then by concatenating the local matrices the end result.
2998   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2999 */
3000 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3001 {
3002   PetscErrorCode ierr;
3003   PetscMPIInt    rank,size;
3004   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3005   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3006   Mat            *local,M,Mreuse;
3007   MatScalar      *vwork,*aa;
3008   MPI_Comm       comm = ((PetscObject)mat)->comm;
3009   Mat_SeqAIJ     *aij;
3010 
3011 
3012   PetscFunctionBegin;
3013   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3014   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3015 
3016   if (call ==  MAT_REUSE_MATRIX) {
3017     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3018     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3019     local = &Mreuse;
3020     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3021   } else {
3022     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3023     Mreuse = *local;
3024     ierr   = PetscFree(local);CHKERRQ(ierr);
3025   }
3026 
3027   /*
3028       m - number of local rows
3029       n - number of columns (same on all processors)
3030       rstart - first row in new global matrix generated
3031   */
3032   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3033   if (call == MAT_INITIAL_MATRIX) {
3034     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3035     ii  = aij->i;
3036     jj  = aij->j;
3037 
3038     /*
3039         Determine the number of non-zeros in the diagonal and off-diagonal
3040         portions of the matrix in order to do correct preallocation
3041     */
3042 
3043     /* first get start and end of "diagonal" columns */
3044     if (csize == PETSC_DECIDE) {
3045       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3046       if (mglobal == n) { /* square matrix */
3047 	nlocal = m;
3048       } else {
3049         nlocal = n/size + ((n % size) > rank);
3050       }
3051     } else {
3052       nlocal = csize;
3053     }
3054     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3055     rstart = rend - nlocal;
3056     if (rank == size - 1 && rend != n) {
3057       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3058     }
3059 
3060     /* next, compute all the lengths */
3061     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3062     olens = dlens + m;
3063     for (i=0; i<m; i++) {
3064       jend = ii[i+1] - ii[i];
3065       olen = 0;
3066       dlen = 0;
3067       for (j=0; j<jend; j++) {
3068         if (*jj < rstart || *jj >= rend) olen++;
3069         else dlen++;
3070         jj++;
3071       }
3072       olens[i] = olen;
3073       dlens[i] = dlen;
3074     }
3075     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3076     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3077     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3078     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3079     ierr = PetscFree(dlens);CHKERRQ(ierr);
3080   } else {
3081     PetscInt ml,nl;
3082 
3083     M = *newmat;
3084     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3085     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3086     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3087     /*
3088          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3089        rather than the slower MatSetValues().
3090     */
3091     M->was_assembled = PETSC_TRUE;
3092     M->assembled     = PETSC_FALSE;
3093   }
3094   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3095   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3096   ii  = aij->i;
3097   jj  = aij->j;
3098   aa  = aij->a;
3099   for (i=0; i<m; i++) {
3100     row   = rstart + i;
3101     nz    = ii[i+1] - ii[i];
3102     cwork = jj;     jj += nz;
3103     vwork = aa;     aa += nz;
3104     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3105   }
3106 
3107   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3108   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3109   *newmat = M;
3110 
3111   /* save submatrix used in processor for next request */
3112   if (call ==  MAT_INITIAL_MATRIX) {
3113     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3114     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3115   }
3116 
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 EXTERN_C_BEGIN
3121 #undef __FUNCT__
3122 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3123 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3124 {
3125   PetscInt       m,cstart, cend,j,nnz,i,d;
3126   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3127   const PetscInt *JJ;
3128   PetscScalar    *values;
3129   PetscErrorCode ierr;
3130 
3131   PetscFunctionBegin;
3132   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3133 
3134   B->rmap->bs = B->cmap->bs = 1;
3135   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3136   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3137   m      = B->rmap->n;
3138   cstart = B->cmap->rstart;
3139   cend   = B->cmap->rend;
3140   rstart = B->rmap->rstart;
3141 
3142   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3143   o_nnz = d_nnz + m;
3144 
3145 #if defined(PETSC_USE_DEBUGGING)
3146   for (i=0; i<m; i++) {
3147     nnz     = Ii[i+1]- Ii[i];
3148     JJ      = J + Ii[i];
3149     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3150     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3151     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3152     for (j=1; j<nnz; j++) {
3153       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3154     }
3155   }
3156 #endif
3157 
3158   for (i=0; i<m; i++) {
3159     nnz     = Ii[i+1]- Ii[i];
3160     JJ      = J + Ii[i];
3161     nnz_max = PetscMax(nnz_max,nnz);
3162     for (j=0; j<nnz; j++) {
3163       if (*JJ >= cstart) break;
3164       JJ++;
3165     }
3166     d = 0;
3167     for (; j<nnz; j++) {
3168       if (*JJ++ >= cend) break;
3169       d++;
3170     }
3171     d_nnz[i] = d;
3172     o_nnz[i] = nnz - d;
3173   }
3174   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3175   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3176 
3177   if (v) values = (PetscScalar*)v;
3178   else {
3179     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3180     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3181   }
3182 
3183   for (i=0; i<m; i++) {
3184     ii   = i + rstart;
3185     nnz  = Ii[i+1]- Ii[i];
3186     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3187   }
3188   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3189   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3190 
3191   if (!v) {
3192     ierr = PetscFree(values);CHKERRQ(ierr);
3193   }
3194   PetscFunctionReturn(0);
3195 }
3196 EXTERN_C_END
3197 
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3200 /*@
3201    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3202    (the default parallel PETSc format).
3203 
3204    Collective on MPI_Comm
3205 
3206    Input Parameters:
3207 +  B - the matrix
3208 .  i - the indices into j for the start of each local row (starts with zero)
3209 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3210 -  v - optional values in the matrix
3211 
3212    Level: developer
3213 
3214    Notes:
3215        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3216      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3217      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3218 
3219        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3220 
3221        The format which is used for the sparse matrix input, is equivalent to a
3222     row-major ordering.. i.e for the following matrix, the input data expected is
3223     as shown:
3224 
3225         1 0 0
3226         2 0 3     P0
3227        -------
3228         4 5 6     P1
3229 
3230      Process0 [P0]: rows_owned=[0,1]
3231         i =  {0,1,3}  [size = nrow+1  = 2+1]
3232         j =  {0,0,2}  [size = nz = 6]
3233         v =  {1,2,3}  [size = nz = 6]
3234 
3235      Process1 [P1]: rows_owned=[2]
3236         i =  {0,3}    [size = nrow+1  = 1+1]
3237         j =  {0,1,2}  [size = nz = 6]
3238         v =  {4,5,6}  [size = nz = 6]
3239 
3240       The column indices for each row MUST be sorted.
3241 
3242 .keywords: matrix, aij, compressed row, sparse, parallel
3243 
3244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3245           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3246 @*/
3247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3248 {
3249   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3250 
3251   PetscFunctionBegin;
3252   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3253   if (f) {
3254     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3255   }
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3261 /*@C
3262    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3263    (the default parallel PETSc format).  For good matrix assembly performance
3264    the user should preallocate the matrix storage by setting the parameters
3265    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3266    performance can be increased by more than a factor of 50.
3267 
3268    Collective on MPI_Comm
3269 
3270    Input Parameters:
3271 +  A - the matrix
3272 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3273            (same value is used for all local rows)
3274 .  d_nnz - array containing the number of nonzeros in the various rows of the
3275            DIAGONAL portion of the local submatrix (possibly different for each row)
3276            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3277            The size of this array is equal to the number of local rows, i.e 'm'.
3278            You must leave room for the diagonal entry even if it is zero.
3279 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3280            submatrix (same value is used for all local rows).
3281 -  o_nnz - array containing the number of nonzeros in the various rows of the
3282            OFF-DIAGONAL portion of the local submatrix (possibly different for
3283            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3284            structure. The size of this array is equal to the number
3285            of local rows, i.e 'm'.
3286 
3287    If the *_nnz parameter is given then the *_nz parameter is ignored
3288 
3289    The AIJ format (also called the Yale sparse matrix format or
3290    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3291    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3292 
3293    The parallel matrix is partitioned such that the first m0 rows belong to
3294    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3295    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3296 
3297    The DIAGONAL portion of the local submatrix of a processor can be defined
3298    as the submatrix which is obtained by extraction the part corresponding
3299    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3300    first row that belongs to the processor, and r2 is the last row belonging
3301    to the this processor. This is a square mxm matrix. The remaining portion
3302    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3303 
3304    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3305 
3306    You can call MatGetInfo() to get information on how effective the preallocation was;
3307    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3308    You can also run with the option -info and look for messages with the string
3309    malloc in them to see if additional memory allocation was needed.
3310 
3311    Example usage:
3312 
3313    Consider the following 8x8 matrix with 34 non-zero values, that is
3314    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3315    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3316    as follows:
3317 
3318 .vb
3319             1  2  0  |  0  3  0  |  0  4
3320     Proc0   0  5  6  |  7  0  0  |  8  0
3321             9  0 10  | 11  0  0  | 12  0
3322     -------------------------------------
3323            13  0 14  | 15 16 17  |  0  0
3324     Proc1   0 18  0  | 19 20 21  |  0  0
3325             0  0  0  | 22 23  0  | 24  0
3326     -------------------------------------
3327     Proc2  25 26 27  |  0  0 28  | 29  0
3328            30  0  0  | 31 32 33  |  0 34
3329 .ve
3330 
3331    This can be represented as a collection of submatrices as:
3332 
3333 .vb
3334       A B C
3335       D E F
3336       G H I
3337 .ve
3338 
3339    Where the submatrices A,B,C are owned by proc0, D,E,F are
3340    owned by proc1, G,H,I are owned by proc2.
3341 
3342    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3343    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3344    The 'M','N' parameters are 8,8, and have the same values on all procs.
3345 
3346    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3347    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3348    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3349    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3350    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3351    matrix, ans [DF] as another SeqAIJ matrix.
3352 
3353    When d_nz, o_nz parameters are specified, d_nz storage elements are
3354    allocated for every row of the local diagonal submatrix, and o_nz
3355    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3356    One way to choose d_nz and o_nz is to use the max nonzerors per local
3357    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3358    In this case, the values of d_nz,o_nz are:
3359 .vb
3360      proc0 : dnz = 2, o_nz = 2
3361      proc1 : dnz = 3, o_nz = 2
3362      proc2 : dnz = 1, o_nz = 4
3363 .ve
3364    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3365    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3366    for proc3. i.e we are using 12+15+10=37 storage locations to store
3367    34 values.
3368 
3369    When d_nnz, o_nnz parameters are specified, the storage is specified
3370    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3371    In the above case the values for d_nnz,o_nnz are:
3372 .vb
3373      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3374      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3375      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3376 .ve
3377    Here the space allocated is sum of all the above values i.e 34, and
3378    hence pre-allocation is perfect.
3379 
3380    Level: intermediate
3381 
3382 .keywords: matrix, aij, compressed row, sparse, parallel
3383 
3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3385           MPIAIJ, MatGetInfo()
3386 @*/
3387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3388 {
3389   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3390 
3391   PetscFunctionBegin;
3392   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3393   if (f) {
3394     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3395   }
3396   PetscFunctionReturn(0);
3397 }
3398 
3399 #undef __FUNCT__
3400 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3401 /*@
3402      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3403          CSR format the local rows.
3404 
3405    Collective on MPI_Comm
3406 
3407    Input Parameters:
3408 +  comm - MPI communicator
3409 .  m - number of local rows (Cannot be PETSC_DECIDE)
3410 .  n - This value should be the same as the local size used in creating the
3411        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3412        calculated if N is given) For square matrices n is almost always m.
3413 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3414 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3415 .   i - row indices
3416 .   j - column indices
3417 -   a - matrix values
3418 
3419    Output Parameter:
3420 .   mat - the matrix
3421 
3422    Level: intermediate
3423 
3424    Notes:
3425        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3426      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3427      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3428 
3429        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3430 
3431        The format which is used for the sparse matrix input, is equivalent to a
3432     row-major ordering.. i.e for the following matrix, the input data expected is
3433     as shown:
3434 
3435         1 0 0
3436         2 0 3     P0
3437        -------
3438         4 5 6     P1
3439 
3440      Process0 [P0]: rows_owned=[0,1]
3441         i =  {0,1,3}  [size = nrow+1  = 2+1]
3442         j =  {0,0,2}  [size = nz = 6]
3443         v =  {1,2,3}  [size = nz = 6]
3444 
3445      Process1 [P1]: rows_owned=[2]
3446         i =  {0,3}    [size = nrow+1  = 1+1]
3447         j =  {0,1,2}  [size = nz = 6]
3448         v =  {4,5,6}  [size = nz = 6]
3449 
3450 .keywords: matrix, aij, compressed row, sparse, parallel
3451 
3452 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3453           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3454 @*/
3455 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3456 {
3457   PetscErrorCode ierr;
3458 
3459  PetscFunctionBegin;
3460   if (i[0]) {
3461     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3462   }
3463   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3464   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3465   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3466   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3467   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3468   PetscFunctionReturn(0);
3469 }
3470 
3471 #undef __FUNCT__
3472 #define __FUNCT__ "MatCreateMPIAIJ"
3473 /*@C
3474    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3475    (the default parallel PETSc format).  For good matrix assembly performance
3476    the user should preallocate the matrix storage by setting the parameters
3477    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3478    performance can be increased by more than a factor of 50.
3479 
3480    Collective on MPI_Comm
3481 
3482    Input Parameters:
3483 +  comm - MPI communicator
3484 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3485            This value should be the same as the local size used in creating the
3486            y vector for the matrix-vector product y = Ax.
3487 .  n - This value should be the same as the local size used in creating the
3488        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3489        calculated if N is given) For square matrices n is almost always m.
3490 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3491 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3492 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3493            (same value is used for all local rows)
3494 .  d_nnz - array containing the number of nonzeros in the various rows of the
3495            DIAGONAL portion of the local submatrix (possibly different for each row)
3496            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3497            The size of this array is equal to the number of local rows, i.e 'm'.
3498            You must leave room for the diagonal entry even if it is zero.
3499 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3500            submatrix (same value is used for all local rows).
3501 -  o_nnz - array containing the number of nonzeros in the various rows of the
3502            OFF-DIAGONAL portion of the local submatrix (possibly different for
3503            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3504            structure. The size of this array is equal to the number
3505            of local rows, i.e 'm'.
3506 
3507    Output Parameter:
3508 .  A - the matrix
3509 
3510    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3511    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3512    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3513    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3514 
3515    Notes:
3516    If the *_nnz parameter is given then the *_nz parameter is ignored
3517 
3518    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3519    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3520    storage requirements for this matrix.
3521 
3522    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3523    processor than it must be used on all processors that share the object for
3524    that argument.
3525 
3526    The user MUST specify either the local or global matrix dimensions
3527    (possibly both).
3528 
3529    The parallel matrix is partitioned across processors such that the
3530    first m0 rows belong to process 0, the next m1 rows belong to
3531    process 1, the next m2 rows belong to process 2 etc.. where
3532    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3533    values corresponding to [m x N] submatrix.
3534 
3535    The columns are logically partitioned with the n0 columns belonging
3536    to 0th partition, the next n1 columns belonging to the next
3537    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3538 
3539    The DIAGONAL portion of the local submatrix on any given processor
3540    is the submatrix corresponding to the rows and columns m,n
3541    corresponding to the given processor. i.e diagonal matrix on
3542    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3543    etc. The remaining portion of the local submatrix [m x (N-n)]
3544    constitute the OFF-DIAGONAL portion. The example below better
3545    illustrates this concept.
3546 
3547    For a square global matrix we define each processor's diagonal portion
3548    to be its local rows and the corresponding columns (a square submatrix);
3549    each processor's off-diagonal portion encompasses the remainder of the
3550    local matrix (a rectangular submatrix).
3551 
3552    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3553 
3554    When calling this routine with a single process communicator, a matrix of
3555    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3556    type of communicator, use the construction mechanism:
3557      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3558 
3559    By default, this format uses inodes (identical nodes) when possible.
3560    We search for consecutive rows with the same nonzero structure, thereby
3561    reusing matrix information to achieve increased efficiency.
3562 
3563    Options Database Keys:
3564 +  -mat_no_inode  - Do not use inodes
3565 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3566 -  -mat_aij_oneindex - Internally use indexing starting at 1
3567         rather than 0.  Note that when calling MatSetValues(),
3568         the user still MUST index entries starting at 0!
3569 
3570 
3571    Example usage:
3572 
3573    Consider the following 8x8 matrix with 34 non-zero values, that is
3574    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3575    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3576    as follows:
3577 
3578 .vb
3579             1  2  0  |  0  3  0  |  0  4
3580     Proc0   0  5  6  |  7  0  0  |  8  0
3581             9  0 10  | 11  0  0  | 12  0
3582     -------------------------------------
3583            13  0 14  | 15 16 17  |  0  0
3584     Proc1   0 18  0  | 19 20 21  |  0  0
3585             0  0  0  | 22 23  0  | 24  0
3586     -------------------------------------
3587     Proc2  25 26 27  |  0  0 28  | 29  0
3588            30  0  0  | 31 32 33  |  0 34
3589 .ve
3590 
3591    This can be represented as a collection of submatrices as:
3592 
3593 .vb
3594       A B C
3595       D E F
3596       G H I
3597 .ve
3598 
3599    Where the submatrices A,B,C are owned by proc0, D,E,F are
3600    owned by proc1, G,H,I are owned by proc2.
3601 
3602    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3603    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3604    The 'M','N' parameters are 8,8, and have the same values on all procs.
3605 
3606    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3607    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3608    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3609    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3610    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3611    matrix, ans [DF] as another SeqAIJ matrix.
3612 
3613    When d_nz, o_nz parameters are specified, d_nz storage elements are
3614    allocated for every row of the local diagonal submatrix, and o_nz
3615    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3616    One way to choose d_nz and o_nz is to use the max nonzerors per local
3617    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3618    In this case, the values of d_nz,o_nz are:
3619 .vb
3620      proc0 : dnz = 2, o_nz = 2
3621      proc1 : dnz = 3, o_nz = 2
3622      proc2 : dnz = 1, o_nz = 4
3623 .ve
3624    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3625    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3626    for proc3. i.e we are using 12+15+10=37 storage locations to store
3627    34 values.
3628 
3629    When d_nnz, o_nnz parameters are specified, the storage is specified
3630    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3631    In the above case the values for d_nnz,o_nnz are:
3632 .vb
3633      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3634      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3635      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3636 .ve
3637    Here the space allocated is sum of all the above values i.e 34, and
3638    hence pre-allocation is perfect.
3639 
3640    Level: intermediate
3641 
3642 .keywords: matrix, aij, compressed row, sparse, parallel
3643 
3644 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3645           MPIAIJ, MatCreateMPIAIJWithArrays()
3646 @*/
3647 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3648 {
3649   PetscErrorCode ierr;
3650   PetscMPIInt    size;
3651 
3652   PetscFunctionBegin;
3653   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3654   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3655   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3656   if (size > 1) {
3657     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3658     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3659   } else {
3660     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3661     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3662   }
3663   PetscFunctionReturn(0);
3664 }
3665 
3666 #undef __FUNCT__
3667 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3668 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3669 {
3670   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3671 
3672   PetscFunctionBegin;
3673   *Ad     = a->A;
3674   *Ao     = a->B;
3675   *colmap = a->garray;
3676   PetscFunctionReturn(0);
3677 }
3678 
3679 #undef __FUNCT__
3680 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3681 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3682 {
3683   PetscErrorCode ierr;
3684   PetscInt       i;
3685   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3686 
3687   PetscFunctionBegin;
3688   if (coloring->ctype == IS_COLORING_GLOBAL) {
3689     ISColoringValue *allcolors,*colors;
3690     ISColoring      ocoloring;
3691 
3692     /* set coloring for diagonal portion */
3693     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3694 
3695     /* set coloring for off-diagonal portion */
3696     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3697     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3698     for (i=0; i<a->B->cmap->n; i++) {
3699       colors[i] = allcolors[a->garray[i]];
3700     }
3701     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3702     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3703     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3704     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3705   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3706     ISColoringValue *colors;
3707     PetscInt        *larray;
3708     ISColoring      ocoloring;
3709 
3710     /* set coloring for diagonal portion */
3711     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3712     for (i=0; i<a->A->cmap->n; i++) {
3713       larray[i] = i + A->cmap->rstart;
3714     }
3715     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3716     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3717     for (i=0; i<a->A->cmap->n; i++) {
3718       colors[i] = coloring->colors[larray[i]];
3719     }
3720     ierr = PetscFree(larray);CHKERRQ(ierr);
3721     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3722     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3723     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3724 
3725     /* set coloring for off-diagonal portion */
3726     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3727     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3728     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3729     for (i=0; i<a->B->cmap->n; i++) {
3730       colors[i] = coloring->colors[larray[i]];
3731     }
3732     ierr = PetscFree(larray);CHKERRQ(ierr);
3733     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3734     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3735     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3736   } else {
3737     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3738   }
3739 
3740   PetscFunctionReturn(0);
3741 }
3742 
3743 #if defined(PETSC_HAVE_ADIC)
3744 #undef __FUNCT__
3745 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3746 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3747 {
3748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3749   PetscErrorCode ierr;
3750 
3751   PetscFunctionBegin;
3752   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3753   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3754   PetscFunctionReturn(0);
3755 }
3756 #endif
3757 
3758 #undef __FUNCT__
3759 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3760 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3761 {
3762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3763   PetscErrorCode ierr;
3764 
3765   PetscFunctionBegin;
3766   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3767   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatMerge"
3773 /*@
3774       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3775                  matrices from each processor
3776 
3777     Collective on MPI_Comm
3778 
3779    Input Parameters:
3780 +    comm - the communicators the parallel matrix will live on
3781 .    inmat - the input sequential matrices
3782 .    n - number of local columns (or PETSC_DECIDE)
3783 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3784 
3785    Output Parameter:
3786 .    outmat - the parallel matrix generated
3787 
3788     Level: advanced
3789 
3790    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3791 
3792 @*/
3793 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3794 {
3795   PetscErrorCode ierr;
3796   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3797   PetscInt       *indx;
3798   PetscScalar    *values;
3799 
3800   PetscFunctionBegin;
3801   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3802   if (scall == MAT_INITIAL_MATRIX){
3803     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3804     if (n == PETSC_DECIDE){
3805       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3806     }
3807     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3808     rstart -= m;
3809 
3810     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3811     for (i=0;i<m;i++) {
3812       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3813       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3814       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3815     }
3816     /* This routine will ONLY return MPIAIJ type matrix */
3817     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3818     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3819     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3820     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3821     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3822 
3823   } else if (scall == MAT_REUSE_MATRIX){
3824     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3825   } else {
3826     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3827   }
3828 
3829   for (i=0;i<m;i++) {
3830     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3831     Ii    = i + rstart;
3832     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3833     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3834   }
3835   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3836   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3837   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3838 
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatFileSplit"
3844 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3845 {
3846   PetscErrorCode    ierr;
3847   PetscMPIInt       rank;
3848   PetscInt          m,N,i,rstart,nnz;
3849   size_t            len;
3850   const PetscInt    *indx;
3851   PetscViewer       out;
3852   char              *name;
3853   Mat               B;
3854   const PetscScalar *values;
3855 
3856   PetscFunctionBegin;
3857   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3858   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3859   /* Should this be the type of the diagonal block of A? */
3860   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3861   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3862   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3863   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3864   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3865   for (i=0;i<m;i++) {
3866     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3867     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3868     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3869   }
3870   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3871   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3872 
3873   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3874   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3875   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3876   sprintf(name,"%s.%d",outfile,rank);
3877   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3878   ierr = PetscFree(name);
3879   ierr = MatView(B,out);CHKERRQ(ierr);
3880   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3881   ierr = MatDestroy(B);CHKERRQ(ierr);
3882   PetscFunctionReturn(0);
3883 }
3884 
3885 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3886 #undef __FUNCT__
3887 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3888 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3889 {
3890   PetscErrorCode       ierr;
3891   Mat_Merge_SeqsToMPI  *merge;
3892   PetscContainer       container;
3893 
3894   PetscFunctionBegin;
3895   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3896   if (container) {
3897     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3898     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3899     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3900     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3901     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3902     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3903     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3904     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3905     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3906     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3907     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3908     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3909 
3910     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3911     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3912   }
3913   ierr = PetscFree(merge);CHKERRQ(ierr);
3914 
3915   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3916   PetscFunctionReturn(0);
3917 }
3918 
3919 #include "src/mat/utils/freespace.h"
3920 #include "petscbt.h"
3921 
3922 #undef __FUNCT__
3923 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3924 /*@C
3925       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3926                  matrices from each processor
3927 
3928     Collective on MPI_Comm
3929 
3930    Input Parameters:
3931 +    comm - the communicators the parallel matrix will live on
3932 .    seqmat - the input sequential matrices
3933 .    m - number of local rows (or PETSC_DECIDE)
3934 .    n - number of local columns (or PETSC_DECIDE)
3935 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3936 
3937    Output Parameter:
3938 .    mpimat - the parallel matrix generated
3939 
3940     Level: advanced
3941 
3942    Notes:
3943      The dimensions of the sequential matrix in each processor MUST be the same.
3944      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3945      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3946 @*/
3947 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3948 {
3949   PetscErrorCode       ierr;
3950   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3951   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3952   PetscMPIInt          size,rank,taga,*len_s;
3953   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3954   PetscInt             proc,m;
3955   PetscInt             **buf_ri,**buf_rj;
3956   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3957   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3958   MPI_Request          *s_waits,*r_waits;
3959   MPI_Status           *status;
3960   MatScalar            *aa=a->a;
3961   MatScalar            **abuf_r,*ba_i;
3962   Mat_Merge_SeqsToMPI  *merge;
3963   PetscContainer       container;
3964 
3965   PetscFunctionBegin;
3966   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3967 
3968   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3969   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3970 
3971   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3972   if (container) {
3973     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3974   }
3975   bi     = merge->bi;
3976   bj     = merge->bj;
3977   buf_ri = merge->buf_ri;
3978   buf_rj = merge->buf_rj;
3979 
3980   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3981   owners = merge->rowmap.range;
3982   len_s  = merge->len_s;
3983 
3984   /* send and recv matrix values */
3985   /*-----------------------------*/
3986   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3987   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3988 
3989   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3990   for (proc=0,k=0; proc<size; proc++){
3991     if (!len_s[proc]) continue;
3992     i = owners[proc];
3993     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3994     k++;
3995   }
3996 
3997   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3998   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3999   ierr = PetscFree(status);CHKERRQ(ierr);
4000 
4001   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4002   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4003 
4004   /* insert mat values of mpimat */
4005   /*----------------------------*/
4006   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4007   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4008   nextrow = buf_ri_k + merge->nrecv;
4009   nextai  = nextrow + merge->nrecv;
4010 
4011   for (k=0; k<merge->nrecv; k++){
4012     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4013     nrows = *(buf_ri_k[k]);
4014     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4015     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4016   }
4017 
4018   /* set values of ba */
4019   m = merge->rowmap.n;
4020   for (i=0; i<m; i++) {
4021     arow = owners[rank] + i;
4022     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4023     bnzi = bi[i+1] - bi[i];
4024     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4025 
4026     /* add local non-zero vals of this proc's seqmat into ba */
4027     anzi = ai[arow+1] - ai[arow];
4028     aj   = a->j + ai[arow];
4029     aa   = a->a + ai[arow];
4030     nextaj = 0;
4031     for (j=0; nextaj<anzi; j++){
4032       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4033         ba_i[j] += aa[nextaj++];
4034       }
4035     }
4036 
4037     /* add received vals into ba */
4038     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4039       /* i-th row */
4040       if (i == *nextrow[k]) {
4041         anzi = *(nextai[k]+1) - *nextai[k];
4042         aj   = buf_rj[k] + *(nextai[k]);
4043         aa   = abuf_r[k] + *(nextai[k]);
4044         nextaj = 0;
4045         for (j=0; nextaj<anzi; j++){
4046           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4047             ba_i[j] += aa[nextaj++];
4048           }
4049         }
4050         nextrow[k]++; nextai[k]++;
4051       }
4052     }
4053     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4054   }
4055   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4056   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4057 
4058   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4059   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4060   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4061   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4062   PetscFunctionReturn(0);
4063 }
4064 
4065 #undef __FUNCT__
4066 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4067 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4068 {
4069   PetscErrorCode       ierr;
4070   Mat                  B_mpi;
4071   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4072   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4073   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4074   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4075   PetscInt             len,proc,*dnz,*onz;
4076   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4077   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4078   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4079   MPI_Status           *status;
4080   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4081   PetscBT              lnkbt;
4082   Mat_Merge_SeqsToMPI  *merge;
4083   PetscContainer       container;
4084 
4085   PetscFunctionBegin;
4086   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4087 
4088   /* make sure it is a PETSc comm */
4089   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4090   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4091   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4092 
4093   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4094   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4095 
4096   /* determine row ownership */
4097   /*---------------------------------------------------------*/
4098   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4099   merge->rowmap.n = m;
4100   merge->rowmap.N = M;
4101   merge->rowmap.bs = 1;
4102   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4103   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4104   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4105 
4106   m      = merge->rowmap.n;
4107   M      = merge->rowmap.N;
4108   owners = merge->rowmap.range;
4109 
4110   /* determine the number of messages to send, their lengths */
4111   /*---------------------------------------------------------*/
4112   len_s  = merge->len_s;
4113 
4114   len = 0;  /* length of buf_si[] */
4115   merge->nsend = 0;
4116   for (proc=0; proc<size; proc++){
4117     len_si[proc] = 0;
4118     if (proc == rank){
4119       len_s[proc] = 0;
4120     } else {
4121       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4122       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4123     }
4124     if (len_s[proc]) {
4125       merge->nsend++;
4126       nrows = 0;
4127       for (i=owners[proc]; i<owners[proc+1]; i++){
4128         if (ai[i+1] > ai[i]) nrows++;
4129       }
4130       len_si[proc] = 2*(nrows+1);
4131       len += len_si[proc];
4132     }
4133   }
4134 
4135   /* determine the number and length of messages to receive for ij-structure */
4136   /*-------------------------------------------------------------------------*/
4137   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4138   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4139 
4140   /* post the Irecv of j-structure */
4141   /*-------------------------------*/
4142   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4143   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4144 
4145   /* post the Isend of j-structure */
4146   /*--------------------------------*/
4147   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4148   sj_waits = si_waits + merge->nsend;
4149 
4150   for (proc=0, k=0; proc<size; proc++){
4151     if (!len_s[proc]) continue;
4152     i = owners[proc];
4153     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4154     k++;
4155   }
4156 
4157   /* receives and sends of j-structure are complete */
4158   /*------------------------------------------------*/
4159   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4160   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4161 
4162   /* send and recv i-structure */
4163   /*---------------------------*/
4164   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4165   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4166 
4167   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4168   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4169   for (proc=0,k=0; proc<size; proc++){
4170     if (!len_s[proc]) continue;
4171     /* form outgoing message for i-structure:
4172          buf_si[0]:                 nrows to be sent
4173                [1:nrows]:           row index (global)
4174                [nrows+1:2*nrows+1]: i-structure index
4175     */
4176     /*-------------------------------------------*/
4177     nrows = len_si[proc]/2 - 1;
4178     buf_si_i    = buf_si + nrows+1;
4179     buf_si[0]   = nrows;
4180     buf_si_i[0] = 0;
4181     nrows = 0;
4182     for (i=owners[proc]; i<owners[proc+1]; i++){
4183       anzi = ai[i+1] - ai[i];
4184       if (anzi) {
4185         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4186         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4187         nrows++;
4188       }
4189     }
4190     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4191     k++;
4192     buf_si += len_si[proc];
4193   }
4194 
4195   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4196   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4197 
4198   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4199   for (i=0; i<merge->nrecv; i++){
4200     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4201   }
4202 
4203   ierr = PetscFree(len_si);CHKERRQ(ierr);
4204   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4205   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4206   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4207   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4208   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4209   ierr = PetscFree(status);CHKERRQ(ierr);
4210 
4211   /* compute a local seq matrix in each processor */
4212   /*----------------------------------------------*/
4213   /* allocate bi array and free space for accumulating nonzero column info */
4214   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4215   bi[0] = 0;
4216 
4217   /* create and initialize a linked list */
4218   nlnk = N+1;
4219   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4220 
4221   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4222   len = 0;
4223   len  = ai[owners[rank+1]] - ai[owners[rank]];
4224   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4225   current_space = free_space;
4226 
4227   /* determine symbolic info for each local row */
4228   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4229   nextrow = buf_ri_k + merge->nrecv;
4230   nextai  = nextrow + merge->nrecv;
4231   for (k=0; k<merge->nrecv; k++){
4232     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4233     nrows = *buf_ri_k[k];
4234     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4235     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4236   }
4237 
4238   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4239   len = 0;
4240   for (i=0;i<m;i++) {
4241     bnzi   = 0;
4242     /* add local non-zero cols of this proc's seqmat into lnk */
4243     arow   = owners[rank] + i;
4244     anzi   = ai[arow+1] - ai[arow];
4245     aj     = a->j + ai[arow];
4246     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4247     bnzi += nlnk;
4248     /* add received col data into lnk */
4249     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4250       if (i == *nextrow[k]) { /* i-th row */
4251         anzi = *(nextai[k]+1) - *nextai[k];
4252         aj   = buf_rj[k] + *nextai[k];
4253         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4254         bnzi += nlnk;
4255         nextrow[k]++; nextai[k]++;
4256       }
4257     }
4258     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4259 
4260     /* if free space is not available, make more free space */
4261     if (current_space->local_remaining<bnzi) {
4262       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4263       nspacedouble++;
4264     }
4265     /* copy data into free space, then initialize lnk */
4266     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4267     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4268 
4269     current_space->array           += bnzi;
4270     current_space->local_used      += bnzi;
4271     current_space->local_remaining -= bnzi;
4272 
4273     bi[i+1] = bi[i] + bnzi;
4274   }
4275 
4276   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4277 
4278   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4279   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4280   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4281 
4282   /* create symbolic parallel matrix B_mpi */
4283   /*---------------------------------------*/
4284   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4285   if (n==PETSC_DECIDE) {
4286     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4287   } else {
4288     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4289   }
4290   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4291   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4292   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4293 
4294   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4295   B_mpi->assembled     = PETSC_FALSE;
4296   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4297   merge->bi            = bi;
4298   merge->bj            = bj;
4299   merge->buf_ri        = buf_ri;
4300   merge->buf_rj        = buf_rj;
4301   merge->coi           = PETSC_NULL;
4302   merge->coj           = PETSC_NULL;
4303   merge->owners_co     = PETSC_NULL;
4304 
4305   /* attach the supporting struct to B_mpi for reuse */
4306   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4307   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4308   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4309   *mpimat = B_mpi;
4310 
4311   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4312   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4313   PetscFunctionReturn(0);
4314 }
4315 
4316 #undef __FUNCT__
4317 #define __FUNCT__ "MatMerge_SeqsToMPI"
4318 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4319 {
4320   PetscErrorCode   ierr;
4321 
4322   PetscFunctionBegin;
4323   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4324   if (scall == MAT_INITIAL_MATRIX){
4325     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4326   }
4327   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4328   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4329   PetscFunctionReturn(0);
4330 }
4331 
4332 #undef __FUNCT__
4333 #define __FUNCT__ "MatGetLocalMat"
4334 /*@
4335      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4336 
4337     Not Collective
4338 
4339    Input Parameters:
4340 +    A - the matrix
4341 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4342 
4343    Output Parameter:
4344 .    A_loc - the local sequential matrix generated
4345 
4346     Level: developer
4347 
4348 @*/
4349 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4350 {
4351   PetscErrorCode  ierr;
4352   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4353   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4354   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4355   MatScalar       *aa=a->a,*ba=b->a,*cam;
4356   PetscScalar     *ca;
4357   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4358   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4359 
4360   PetscFunctionBegin;
4361   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4362   if (scall == MAT_INITIAL_MATRIX){
4363     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4364     ci[0] = 0;
4365     for (i=0; i<am; i++){
4366       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4367     }
4368     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4369     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4370     k = 0;
4371     for (i=0; i<am; i++) {
4372       ncols_o = bi[i+1] - bi[i];
4373       ncols_d = ai[i+1] - ai[i];
4374       /* off-diagonal portion of A */
4375       for (jo=0; jo<ncols_o; jo++) {
4376         col = cmap[*bj];
4377         if (col >= cstart) break;
4378         cj[k]   = col; bj++;
4379         ca[k++] = *ba++;
4380       }
4381       /* diagonal portion of A */
4382       for (j=0; j<ncols_d; j++) {
4383         cj[k]   = cstart + *aj++;
4384         ca[k++] = *aa++;
4385       }
4386       /* off-diagonal portion of A */
4387       for (j=jo; j<ncols_o; j++) {
4388         cj[k]   = cmap[*bj++];
4389         ca[k++] = *ba++;
4390       }
4391     }
4392     /* put together the new matrix */
4393     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4394     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4395     /* Since these are PETSc arrays, change flags to free them as necessary. */
4396     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4397     mat->free_a  = PETSC_TRUE;
4398     mat->free_ij = PETSC_TRUE;
4399     mat->nonew   = 0;
4400   } else if (scall == MAT_REUSE_MATRIX){
4401     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4402     ci = mat->i; cj = mat->j; cam = mat->a;
4403     for (i=0; i<am; i++) {
4404       /* off-diagonal portion of A */
4405       ncols_o = bi[i+1] - bi[i];
4406       for (jo=0; jo<ncols_o; jo++) {
4407         col = cmap[*bj];
4408         if (col >= cstart) break;
4409         *cam++ = *ba++; bj++;
4410       }
4411       /* diagonal portion of A */
4412       ncols_d = ai[i+1] - ai[i];
4413       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4414       /* off-diagonal portion of A */
4415       for (j=jo; j<ncols_o; j++) {
4416         *cam++ = *ba++; bj++;
4417       }
4418     }
4419   } else {
4420     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4421   }
4422 
4423   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "MatGetLocalMatCondensed"
4429 /*@C
4430      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4431 
4432     Not Collective
4433 
4434    Input Parameters:
4435 +    A - the matrix
4436 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4437 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4438 
4439    Output Parameter:
4440 .    A_loc - the local sequential matrix generated
4441 
4442     Level: developer
4443 
4444 @*/
4445 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4446 {
4447   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4448   PetscErrorCode    ierr;
4449   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4450   IS                isrowa,iscola;
4451   Mat               *aloc;
4452 
4453   PetscFunctionBegin;
4454   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4455   if (!row){
4456     start = A->rmap->rstart; end = A->rmap->rend;
4457     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4458   } else {
4459     isrowa = *row;
4460   }
4461   if (!col){
4462     start = A->cmap->rstart;
4463     cmap  = a->garray;
4464     nzA   = a->A->cmap->n;
4465     nzB   = a->B->cmap->n;
4466     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4467     ncols = 0;
4468     for (i=0; i<nzB; i++) {
4469       if (cmap[i] < start) idx[ncols++] = cmap[i];
4470       else break;
4471     }
4472     imark = i;
4473     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4474     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4475     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4476     ierr = PetscFree(idx);CHKERRQ(ierr);
4477   } else {
4478     iscola = *col;
4479   }
4480   if (scall != MAT_INITIAL_MATRIX){
4481     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4482     aloc[0] = *A_loc;
4483   }
4484   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4485   *A_loc = aloc[0];
4486   ierr = PetscFree(aloc);CHKERRQ(ierr);
4487   if (!row){
4488     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4489   }
4490   if (!col){
4491     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4492   }
4493   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 #undef __FUNCT__
4498 #define __FUNCT__ "MatGetBrowsOfAcols"
4499 /*@C
4500     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4501 
4502     Collective on Mat
4503 
4504    Input Parameters:
4505 +    A,B - the matrices in mpiaij format
4506 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4507 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4508 
4509    Output Parameter:
4510 +    rowb, colb - index sets of rows and columns of B to extract
4511 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4512 -    B_seq - the sequential matrix generated
4513 
4514     Level: developer
4515 
4516 @*/
4517 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4518 {
4519   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4520   PetscErrorCode    ierr;
4521   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4522   IS                isrowb,iscolb;
4523   Mat               *bseq;
4524 
4525   PetscFunctionBegin;
4526   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4527     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4528   }
4529   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4530 
4531   if (scall == MAT_INITIAL_MATRIX){
4532     start = A->cmap->rstart;
4533     cmap  = a->garray;
4534     nzA   = a->A->cmap->n;
4535     nzB   = a->B->cmap->n;
4536     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4537     ncols = 0;
4538     for (i=0; i<nzB; i++) {  /* row < local row index */
4539       if (cmap[i] < start) idx[ncols++] = cmap[i];
4540       else break;
4541     }
4542     imark = i;
4543     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4544     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4545     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4546     ierr = PetscFree(idx);CHKERRQ(ierr);
4547     *brstart = imark;
4548     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4549   } else {
4550     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4551     isrowb = *rowb; iscolb = *colb;
4552     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4553     bseq[0] = *B_seq;
4554   }
4555   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4556   *B_seq = bseq[0];
4557   ierr = PetscFree(bseq);CHKERRQ(ierr);
4558   if (!rowb){
4559     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4560   } else {
4561     *rowb = isrowb;
4562   }
4563   if (!colb){
4564     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4565   } else {
4566     *colb = iscolb;
4567   }
4568   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 #undef __FUNCT__
4573 #define __FUNCT__ "MatGetBrowsOfAoCols"
4574 /*@C
4575     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4576     of the OFF-DIAGONAL portion of local A
4577 
4578     Collective on Mat
4579 
4580    Input Parameters:
4581 +    A,B - the matrices in mpiaij format
4582 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4583 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4584 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4585 
4586    Output Parameter:
4587 +    B_oth - the sequential matrix generated
4588 
4589     Level: developer
4590 
4591 @*/
4592 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4593 {
4594   VecScatter_MPI_General *gen_to,*gen_from;
4595   PetscErrorCode         ierr;
4596   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4597   Mat_SeqAIJ             *b_oth;
4598   VecScatter             ctx=a->Mvctx;
4599   MPI_Comm               comm=((PetscObject)ctx)->comm;
4600   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4601   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4602   PetscScalar            *rvalues,*svalues;
4603   MatScalar              *b_otha,*bufa,*bufA;
4604   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4605   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4606   MPI_Status             *sstatus,rstatus;
4607   PetscMPIInt            jj;
4608   PetscInt               *cols,sbs,rbs;
4609   PetscScalar            *vals;
4610 
4611   PetscFunctionBegin;
4612   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4613     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4614   }
4615   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4616   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4617 
4618   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4619   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4620   rvalues  = gen_from->values; /* holds the length of receiving row */
4621   svalues  = gen_to->values;   /* holds the length of sending row */
4622   nrecvs   = gen_from->n;
4623   nsends   = gen_to->n;
4624 
4625   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4626   srow     = gen_to->indices;   /* local row index to be sent */
4627   sstarts  = gen_to->starts;
4628   sprocs   = gen_to->procs;
4629   sstatus  = gen_to->sstatus;
4630   sbs      = gen_to->bs;
4631   rstarts  = gen_from->starts;
4632   rprocs   = gen_from->procs;
4633   rbs      = gen_from->bs;
4634 
4635   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4636   if (scall == MAT_INITIAL_MATRIX){
4637     /* i-array */
4638     /*---------*/
4639     /*  post receives */
4640     for (i=0; i<nrecvs; i++){
4641       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4642       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4643       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4644     }
4645 
4646     /* pack the outgoing message */
4647     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4648     rstartsj = sstartsj + nsends +1;
4649     sstartsj[0] = 0;  rstartsj[0] = 0;
4650     len = 0; /* total length of j or a array to be sent */
4651     k = 0;
4652     for (i=0; i<nsends; i++){
4653       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4654       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4655       for (j=0; j<nrows; j++) {
4656         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4657         for (l=0; l<sbs; l++){
4658           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4659           rowlen[j*sbs+l] = ncols;
4660           len += ncols;
4661           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4662         }
4663         k++;
4664       }
4665       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4666       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4667     }
4668     /* recvs and sends of i-array are completed */
4669     i = nrecvs;
4670     while (i--) {
4671       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4672     }
4673     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4674 
4675     /* allocate buffers for sending j and a arrays */
4676     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4677     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4678 
4679     /* create i-array of B_oth */
4680     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4681     b_othi[0] = 0;
4682     len = 0; /* total length of j or a array to be received */
4683     k = 0;
4684     for (i=0; i<nrecvs; i++){
4685       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4686       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4687       for (j=0; j<nrows; j++) {
4688         b_othi[k+1] = b_othi[k] + rowlen[j];
4689         len += rowlen[j]; k++;
4690       }
4691       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4692     }
4693 
4694     /* allocate space for j and a arrrays of B_oth */
4695     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4696     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4697 
4698     /* j-array */
4699     /*---------*/
4700     /*  post receives of j-array */
4701     for (i=0; i<nrecvs; i++){
4702       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4703       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4704     }
4705 
4706     /* pack the outgoing message j-array */
4707     k = 0;
4708     for (i=0; i<nsends; i++){
4709       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4710       bufJ = bufj+sstartsj[i];
4711       for (j=0; j<nrows; j++) {
4712         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4713         for (ll=0; ll<sbs; ll++){
4714           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4715           for (l=0; l<ncols; l++){
4716             *bufJ++ = cols[l];
4717           }
4718           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4719         }
4720       }
4721       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4722     }
4723 
4724     /* recvs and sends of j-array are completed */
4725     i = nrecvs;
4726     while (i--) {
4727       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4728     }
4729     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4730   } else if (scall == MAT_REUSE_MATRIX){
4731     sstartsj = *startsj;
4732     rstartsj = sstartsj + nsends +1;
4733     bufa     = *bufa_ptr;
4734     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4735     b_otha   = b_oth->a;
4736   } else {
4737     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4738   }
4739 
4740   /* a-array */
4741   /*---------*/
4742   /*  post receives of a-array */
4743   for (i=0; i<nrecvs; i++){
4744     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4745     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4746   }
4747 
4748   /* pack the outgoing message a-array */
4749   k = 0;
4750   for (i=0; i<nsends; i++){
4751     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4752     bufA = bufa+sstartsj[i];
4753     for (j=0; j<nrows; j++) {
4754       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4755       for (ll=0; ll<sbs; ll++){
4756         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4757         for (l=0; l<ncols; l++){
4758           *bufA++ = vals[l];
4759         }
4760         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4761       }
4762     }
4763     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4764   }
4765   /* recvs and sends of a-array are completed */
4766   i = nrecvs;
4767   while (i--) {
4768     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4769   }
4770   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4771   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4772 
4773   if (scall == MAT_INITIAL_MATRIX){
4774     /* put together the new matrix */
4775     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4776 
4777     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4778     /* Since these are PETSc arrays, change flags to free them as necessary. */
4779     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4780     b_oth->free_a  = PETSC_TRUE;
4781     b_oth->free_ij = PETSC_TRUE;
4782     b_oth->nonew   = 0;
4783 
4784     ierr = PetscFree(bufj);CHKERRQ(ierr);
4785     if (!startsj || !bufa_ptr){
4786       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4787       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4788     } else {
4789       *startsj  = sstartsj;
4790       *bufa_ptr = bufa;
4791     }
4792   }
4793   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4794   PetscFunctionReturn(0);
4795 }
4796 
4797 #undef __FUNCT__
4798 #define __FUNCT__ "MatGetCommunicationStructs"
4799 /*@C
4800   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4801 
4802   Not Collective
4803 
4804   Input Parameters:
4805 . A - The matrix in mpiaij format
4806 
4807   Output Parameter:
4808 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4809 . colmap - A map from global column index to local index into lvec
4810 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4811 
4812   Level: developer
4813 
4814 @*/
4815 #if defined (PETSC_USE_CTABLE)
4816 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4817 #else
4818 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4819 #endif
4820 {
4821   Mat_MPIAIJ *a;
4822 
4823   PetscFunctionBegin;
4824   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4825   PetscValidPointer(lvec, 2)
4826   PetscValidPointer(colmap, 3)
4827   PetscValidPointer(multScatter, 4)
4828   a = (Mat_MPIAIJ *) A->data;
4829   if (lvec) *lvec = a->lvec;
4830   if (colmap) *colmap = a->colmap;
4831   if (multScatter) *multScatter = a->Mvctx;
4832   PetscFunctionReturn(0);
4833 }
4834 
4835 EXTERN_C_BEGIN
4836 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4837 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4838 EXTERN_C_END
4839 
4840 #include "src/mat/impls/dense/mpi/mpidense.h"
4841 
4842 #undef __FUNCT__
4843 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4844 /*
4845     Computes (B'*A')' since computing B*A directly is untenable
4846 
4847                n                       p                          p
4848         (              )       (              )         (                  )
4849       m (      A       )  *  n (       B      )   =   m (         C        )
4850         (              )       (              )         (                  )
4851 
4852 */
4853 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4854 {
4855   PetscErrorCode     ierr;
4856   Mat                At,Bt,Ct;
4857 
4858   PetscFunctionBegin;
4859   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4860   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4861   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4862   ierr = MatDestroy(At);CHKERRQ(ierr);
4863   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4864   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4865   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4866   PetscFunctionReturn(0);
4867 }
4868 
4869 #undef __FUNCT__
4870 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4871 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4872 {
4873   PetscErrorCode ierr;
4874   PetscInt       m=A->rmap->n,n=B->cmap->n;
4875   Mat            Cmat;
4876 
4877   PetscFunctionBegin;
4878   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4879   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4880   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4881   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4882   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4883   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4884   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4885   *C   = Cmat;
4886   PetscFunctionReturn(0);
4887 }
4888 
4889 /* ----------------------------------------------------------------*/
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4892 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4893 {
4894   PetscErrorCode ierr;
4895 
4896   PetscFunctionBegin;
4897   if (scall == MAT_INITIAL_MATRIX){
4898     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4899   }
4900   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4901   PetscFunctionReturn(0);
4902 }
4903 
4904 EXTERN_C_BEGIN
4905 #if defined(PETSC_HAVE_MUMPS)
4906 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4907 #endif
4908 #if defined(PETSC_HAVE_SUPERLU_DIST)
4909 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4910 #endif
4911 #if defined(PETSC_HAVE_SPOOLES)
4912 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4913 #endif
4914 EXTERN_C_END
4915 
4916 /*MC
4917    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4918 
4919    Options Database Keys:
4920 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4921 
4922   Level: beginner
4923 
4924 .seealso: MatCreateMPIAIJ()
4925 M*/
4926 
4927 EXTERN_C_BEGIN
4928 #undef __FUNCT__
4929 #define __FUNCT__ "MatCreate_MPIAIJ"
4930 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4931 {
4932   Mat_MPIAIJ     *b;
4933   PetscErrorCode ierr;
4934   PetscMPIInt    size;
4935 
4936   PetscFunctionBegin;
4937   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4938 
4939   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4940   B->data         = (void*)b;
4941   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4942   B->rmap->bs      = 1;
4943   B->assembled    = PETSC_FALSE;
4944   B->mapping      = 0;
4945 
4946   B->insertmode      = NOT_SET_VALUES;
4947   b->size            = size;
4948   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4949 
4950   /* build cache for off array entries formed */
4951   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4952   b->donotstash  = PETSC_FALSE;
4953   b->colmap      = 0;
4954   b->garray      = 0;
4955   b->roworiented = PETSC_TRUE;
4956 
4957   /* stuff used for matrix vector multiply */
4958   b->lvec      = PETSC_NULL;
4959   b->Mvctx     = PETSC_NULL;
4960 
4961   /* stuff for MatGetRow() */
4962   b->rowindices   = 0;
4963   b->rowvalues    = 0;
4964   b->getrowactive = PETSC_FALSE;
4965 
4966 #if defined(PETSC_HAVE_SPOOLES)
4967   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4968                                      "MatGetFactor_mpiaij_spooles",
4969                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4970 #endif
4971 #if defined(PETSC_HAVE_MUMPS)
4972   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4973                                      "MatGetFactor_mpiaij_mumps",
4974                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4975 #endif
4976 #if defined(PETSC_HAVE_SUPERLU_DIST)
4977   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4978                                      "MatGetFactor_mpiaij_superlu_dist",
4979                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4980 #endif
4981   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4982                                      "MatStoreValues_MPIAIJ",
4983                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4984   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4985                                      "MatRetrieveValues_MPIAIJ",
4986                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4987   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4988 				     "MatGetDiagonalBlock_MPIAIJ",
4989                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4990   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4991 				     "MatIsTranspose_MPIAIJ",
4992 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4993   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4994 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4995 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4996   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4997 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4998 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4999   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5000 				     "MatDiagonalScaleLocal_MPIAIJ",
5001 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5002   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5003                                      "MatConvert_MPIAIJ_MPICSRPERM",
5004                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5006                                      "MatConvert_MPIAIJ_MPICRL",
5007                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5009                                      "MatMatMult_MPIDense_MPIAIJ",
5010                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5012                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5013                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5015                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5016                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5017   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5018   PetscFunctionReturn(0);
5019 }
5020 EXTERN_C_END
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5024 /*@
5025      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5026          and "off-diagonal" part of the matrix in CSR format.
5027 
5028    Collective on MPI_Comm
5029 
5030    Input Parameters:
5031 +  comm - MPI communicator
5032 .  m - number of local rows (Cannot be PETSC_DECIDE)
5033 .  n - This value should be the same as the local size used in creating the
5034        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5035        calculated if N is given) For square matrices n is almost always m.
5036 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5037 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5038 .   i - row indices for "diagonal" portion of matrix
5039 .   j - column indices
5040 .   a - matrix values
5041 .   oi - row indices for "off-diagonal" portion of matrix
5042 .   oj - column indices
5043 -   oa - matrix values
5044 
5045    Output Parameter:
5046 .   mat - the matrix
5047 
5048    Level: advanced
5049 
5050    Notes:
5051        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5052 
5053        The i and j indices are 0 based
5054 
5055        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5056 
5057 
5058 .keywords: matrix, aij, compressed row, sparse, parallel
5059 
5060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5061           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5062 @*/
5063 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5064 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5065 {
5066   PetscErrorCode ierr;
5067   Mat_MPIAIJ     *maij;
5068 
5069  PetscFunctionBegin;
5070   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5071   if (i[0]) {
5072     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5073   }
5074   if (oi[0]) {
5075     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5076   }
5077   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5078   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5079   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5080   maij = (Mat_MPIAIJ*) (*mat)->data;
5081   maij->donotstash     = PETSC_TRUE;
5082   (*mat)->preallocated = PETSC_TRUE;
5083 
5084   (*mat)->rmap->bs = (*mat)->cmap->bs = 1;
5085   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5086   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5087 
5088   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5089   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5090 
5091   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5092   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5093   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5094   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5095 
5096   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5097   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5098   PetscFunctionReturn(0);
5099 }
5100 
5101 /*
5102     Special version for direct calls from Fortran
5103 */
5104 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5105 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5106 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5107 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5108 #endif
5109 
5110 /* Change these macros so can be used in void function */
5111 #undef CHKERRQ
5112 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5113 #undef SETERRQ2
5114 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5115 #undef SETERRQ
5116 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5117 
5118 EXTERN_C_BEGIN
5119 #undef __FUNCT__
5120 #define __FUNCT__ "matsetvaluesmpiaij_"
5121 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5122 {
5123   Mat             mat = *mmat;
5124   PetscInt        m = *mm, n = *mn;
5125   InsertMode      addv = *maddv;
5126   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5127   PetscScalar     value;
5128   PetscErrorCode  ierr;
5129 
5130   MatPreallocated(mat);
5131   if (mat->insertmode == NOT_SET_VALUES) {
5132     mat->insertmode = addv;
5133   }
5134 #if defined(PETSC_USE_DEBUG)
5135   else if (mat->insertmode != addv) {
5136     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5137   }
5138 #endif
5139   {
5140   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5141   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5142   PetscTruth      roworiented = aij->roworiented;
5143 
5144   /* Some Variables required in the macro */
5145   Mat             A = aij->A;
5146   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5147   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5148   MatScalar       *aa = a->a;
5149   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5150   Mat             B = aij->B;
5151   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5152   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5153   MatScalar       *ba = b->a;
5154 
5155   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5156   PetscInt        nonew = a->nonew;
5157   MatScalar       *ap1,*ap2;
5158 
5159   PetscFunctionBegin;
5160   for (i=0; i<m; i++) {
5161     if (im[i] < 0) continue;
5162 #if defined(PETSC_USE_DEBUG)
5163     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5164 #endif
5165     if (im[i] >= rstart && im[i] < rend) {
5166       row      = im[i] - rstart;
5167       lastcol1 = -1;
5168       rp1      = aj + ai[row];
5169       ap1      = aa + ai[row];
5170       rmax1    = aimax[row];
5171       nrow1    = ailen[row];
5172       low1     = 0;
5173       high1    = nrow1;
5174       lastcol2 = -1;
5175       rp2      = bj + bi[row];
5176       ap2      = ba + bi[row];
5177       rmax2    = bimax[row];
5178       nrow2    = bilen[row];
5179       low2     = 0;
5180       high2    = nrow2;
5181 
5182       for (j=0; j<n; j++) {
5183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5185         if (in[j] >= cstart && in[j] < cend){
5186           col = in[j] - cstart;
5187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5188         } else if (in[j] < 0) continue;
5189 #if defined(PETSC_USE_DEBUG)
5190         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5191 #endif
5192         else {
5193           if (mat->was_assembled) {
5194             if (!aij->colmap) {
5195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5196             }
5197 #if defined (PETSC_USE_CTABLE)
5198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5199 	    col--;
5200 #else
5201             col = aij->colmap[in[j]] - 1;
5202 #endif
5203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5205               col =  in[j];
5206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5207               B = aij->B;
5208               b = (Mat_SeqAIJ*)B->data;
5209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5210               rp2      = bj + bi[row];
5211               ap2      = ba + bi[row];
5212               rmax2    = bimax[row];
5213               nrow2    = bilen[row];
5214               low2     = 0;
5215               high2    = nrow2;
5216               bm       = aij->B->rmap->n;
5217               ba = b->a;
5218             }
5219           } else col = in[j];
5220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5221         }
5222       }
5223     } else {
5224       if (!aij->donotstash) {
5225         if (roworiented) {
5226           if (ignorezeroentries && v[i*n] == 0.0) continue;
5227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5228         } else {
5229           if (ignorezeroentries && v[i] == 0.0) continue;
5230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5231         }
5232       }
5233     }
5234   }}
5235   PetscFunctionReturnVoid();
5236 }
5237 EXTERN_C_END
5238 
5239